[Dart-dev] [5905] DART/branches/development/utilities: add clean_nml - sorts namelists by order, reformats them to be
nancy at ucar.edu
nancy at ucar.edu
Mon Oct 29 10:39:52 MDT 2012
Revision: 5905
Author: nancy
Date: 2012-10-29 10:39:51 -0600 (Mon, 29 Oct 2012)
Log Message:
-----------
add clean_nml - sorts namelists by order, reformats them to be
consistent, culls all comments to bottom of file. also added
copyright to swabrestart util - should have had it all along
but apparently i never added it.
Modified Paths:
--------------
DART/branches/development/utilities/swabrestart.c
Added Paths:
-----------
DART/branches/development/utilities/clean_nml.c
-------------- next part --------------
Added: DART/branches/development/utilities/clean_nml.c
===================================================================
--- DART/branches/development/utilities/clean_nml.c (rev 0)
+++ DART/branches/development/utilities/clean_nml.c 2012-10-29 16:39:51 UTC (rev 5905)
@@ -0,0 +1,605 @@
+/* DART software - Copyright 2004 - 2011 UCAR. This open source software is
+ * provided by UCAR, "as is", without charge, subject to all terms of use at
+ * http://www.image.ucar.edu/DAReS/DART/DART_download
+ */
+
+/* <next few lines under version control, do not edit>
+ * $URL$
+ * $Id$
+ * $Revision$
+ * $Date$
+ */
+
+/*
+ * read in a fortran namelist on stdin, reformat and sort, then
+ * output on stdout. make spacing, commas, etc consistent.
+ * the goal is to make it easier to compare 2 different namelist
+ * files which have diverged in order of namelists inside the file,
+ * formatting, etc.
+ *
+ * nsc 18nov2011
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+/*
+ * an F90 namelist starts with &name and ends with a /
+ * the contents are formatted as 'name = value' pairs
+ * it is common for lines to end with comma but doesn't seem
+ * to be required. strings are enclosed in single or double quotes.
+ * lines outside the &name, / delimiters are ignored and often
+ * used as comments.
+ *
+ * separators seem to be commas, in which case multiple values
+ * can occur on the same line. arrays of values can seem to
+ * occur on multiple lines. bother.
+ *
+ * this program needs to read in multiple namelists (so it can
+ * sort them by name) and the contents, and the lines outside the
+ * namelists, and output them in a consistent spacing, order, etc
+ * so we can diff them easily.
+ *
+ * usually there are a few more prominent namelist names that
+ * should appear at the top of the file. first pass, just do
+ * them all alphabetically, but later on it would be nice to
+ * be able to indicate which ones should be at the top of the
+ * output file.
+ *
+ * it figures out the longest name and lines up the ='s
+ *
+ * comment lines (outside any namelist) are collected and all
+ * output at the end - kinda a pain since it removes them from
+ * the namelist they were about. but it does preserve them.
+ * it also calls tolower() so all text is lower case.
+ * i don't remember if i put in support yet for:
+ * &namelist name=value /
+ * (start, val, stop all on the same line) which is technically legal.
+ */
+
+#define MAXNMLS 200
+#define MAXENTRIES 1000
+#define MAXVALUES 2000
+#define MAXCOMMENTS 1000
+
+/* data structs */
+struct nv_pairs {
+ char *name;
+ int nvalues;
+ char **value;
+};
+
+struct nml {
+ char *name;
+ int nitems;
+ struct nv_pairs nvp[MAXENTRIES];
+};
+
+struct nmllist {
+ struct nml nmll[MAXNMLS];
+ int nmllcount;
+ char *comment[MAXCOMMENTS];
+ int commentcount;
+ int sort_list[MAXNMLS];
+};
+
+struct nmllist l;
+
+/* make these long */
+#define MAXLINE 1024
+#define MAXTOKEN 1024
+
+char linebuf[MAXLINE];
+char nbuf[MAXTOKEN];
+char vbuf[MAXTOKEN];
+
+void setup(void);
+void takedown(void);
+void readin(void);
+void do_sort(void);
+void writeout(int sortme);
+void printnml(struct nml *nl);
+int nmlstart(char *line, int linelen, char **name);
+int nmlend(char *line, int linelen);
+int onlyslash(char *line, int linelen);
+int emptyline(char *line, int linelen);
+int splitme(char *line, int linelen, char **name, char **value);
+char *haschar(char *line, int linelen, char target);
+int longestname(struct nml *nl);
+int nextname(char *line, int linelen, int offset, int *start, int *end);
+int nextvalue(char *line, int linelen, int offset, int *start, int *end);
+
+int main(int argc, char **argv)
+{
+ if (argc > 1) {
+ fprintf(stderr, "usage: %s < stdin > stdout\n", argv[0]);
+ fprintf(stderr, " takes no arguments\n");
+ exit (-1);
+ }
+
+ setup();
+
+ /* read stdin, add new namelists for each & encountered.
+ * read contents, adding a new item for each name=value pair.
+ * sort
+ * output
+ */
+
+ readin();
+
+ do_sort();
+
+ /* set arg to 0 to avoid alphabetical sort */
+ writeout(1);
+
+ takedown();
+
+ exit(0);
+}
+
+
+void setup(void)
+{
+ /* allocate here? */
+}
+
+void takedown(void)
+{
+ /* deallocate here */
+}
+
+void readin(void)
+{
+ char *name, *val;
+ int in_nml, linelen, action;
+ struct nv_pairs *nvp;
+ struct nml *n;
+
+ n = NULL;
+ in_nml = 0;
+ while (fgets(linebuf, sizeof(linebuf), stdin) != NULL) {
+ linelen = strlen(linebuf);
+/*printf("before line: '%s' \n", linebuf); */
+/*printf("before linelen = %d\n", linelen); */
+/*printf("before char[n] = '%c' \n", linebuf[linelen-1]); */
+ if (linebuf[linelen-1] == '\n') {
+ linebuf[linelen-1] = '\0';
+ linelen--;
+ }
+/*printf("after line: '%s' \n", linebuf); */
+/*printf("after linelen = %d\n", linelen); */
+/*printf("after char[n] = '%c' \n", linebuf[linelen-1]); */
+ if (linelen <= 0) continue;
+ action = 0;
+
+ if (!in_nml) {
+ /* not currently in namelist definition */
+ if (nmlstart(linebuf, linelen, &name) > 0) {
+ l.nmllcount++;
+ n = &(l.nmll[l.nmllcount-1]);
+ n->name = name;
+ in_nml = 1;
+ action = 1;
+ } else {
+ /* comment line outside nmls - keep or toss? */
+ /* keep and output at end of file. */
+ if (!emptyline(linebuf, linelen)) {
+ l.commentcount++;
+ l.comment[l.commentcount-1] = malloc(linelen+1);
+ strncpy(l.comment[l.commentcount-1], linebuf, linelen);
+ }
+ }
+ }
+
+ if (in_nml) {
+ /* in a namelist */
+ if (n == NULL) {
+ fprintf(stderr, "internal inconsistency: in_nml true, n null\n");
+ exit (-1);
+ }
+ /* definition of item */
+ if (splitme(linebuf, linelen, &name, &val) > 0) {
+ n->nitems++;
+ nvp = &(l.nmll[l.nmllcount-1].nvp[n->nitems-1]);
+ nvp->name = name;
+ nvp->value = malloc(MAXVALUES * sizeof(char *));
+ nvp->value[0] = val;
+ nvp->nvalues = 1;
+ action = 1;
+ }
+ /* only a slash on a single line? */
+ if (onlyslash(linebuf, linelen) > 0) {
+ n = NULL;
+ nvp = NULL;
+ in_nml = 0;
+ action = 1;
+ }
+ /* continuation line - append to current name */
+ if (action == 0 && nvp != NULL) {
+ if (justvalue(linebuf, linelen, &val)) {
+ nvp->value[nvp->nvalues] = val;
+ nvp->nvalues++;
+ }
+ }
+ /* trailing slash on same line as value? */
+ if (nmlend(linebuf, linelen) > 0) {
+ n = NULL;
+ nvp = NULL;
+ in_nml = 0;
+ action = 1;
+ }
+ }
+ }
+}
+
+void do_sort()
+{
+ int i, j, tmp;
+
+ for (i=0; i<l.nmllcount; i++)
+ l.sort_list[i] = i;
+
+ for (i=0; i<l.nmllcount; i++) {
+ for (j=0; j<l.nmllcount-1; j++) {
+ if (strcmp(l.nmll[l.sort_list[j]].name, l.nmll[l.sort_list[j+1]].name) > 0) {
+ tmp = l.sort_list[j];
+ l.sort_list[j] = l.sort_list[j+1];
+ l.sort_list[j+1] = tmp;
+ }
+ }
+ }
+}
+
+
+void writeout(int sortme)
+{
+ int i;
+
+ for (i=0; i<l.nmllcount; i++) {
+ if (sortme)
+ printnml(&l.nmll[l.sort_list[i]]);
+ else
+ printnml(&l.nmll[i]);
+ }
+ for (i=0; i<l.commentcount; i++)
+ printf("%s\n", l.comment[i]);
+
+ printf("\n\n");
+}
+
+/* lcase the left name, lcase .true. and .false.? */
+void printnml(struct nml *nl)
+{
+ int i, j, len;
+ char formatE[32], formatEc[32], formatS[32], formatSc[32];
+
+ printf("&%s\n", nl->name);
+ len = longestname(nl);
+ sprintf(formatE, " %%-%ds = %%s\n", len);
+ sprintf(formatEc, " %%-%ds = %%s,\n", len);
+ sprintf(formatS, " %%-%ds %%s\n", len);
+ sprintf(formatSc, " %%-%ds %%s,\n", len);
+
+ /* call longestname() here and set name format len */
+ for (i=0; i<nl->nitems; i++) {
+ if (nl->nvp[i].nvalues > 1)
+ printf(formatEc, nl->nvp[i].name, nl->nvp[i].value[0]);
+ else
+ printf(formatE, nl->nvp[i].name, nl->nvp[i].value[0]);
+
+ if (nl->nvp[i].nvalues > 1) {
+ for (j=1; j<nl->nvp[i].nvalues-1; j++)
+ printf(formatSc, "", nl->nvp[i].value[j]);
+ printf(formatS, "", nl->nvp[i].value[j]);
+ }
+ }
+ printf("/\n");
+ printf("\n\n");
+}
+
+/* make sure the & isn't in quotes; stop the name at the next whitespace.
+ */
+int nmlstart(char *line, int linelen, char **name)
+{
+ int i, len;
+ char *e, c;
+
+ e = haschar(line, linelen, '&');
+ len = linelen - (e-line) - 1;
+ if (e != NULL) {
+ for (i=(e-line)+1; i<linelen; i++) {
+ c = line[i];
+/*printf("nmlstart: i %d, c '%c', (e-line) %ld, linelen %d\n", i, c, (e-line), linelen);*/
+/*printf("line: '%s'\n", line);*/
+ if (isspace(c)) {
+ len = i - (e-line) - 1;
+ break;
+ }
+ }
+/*printf("len now %d\n", len);*/
+
+ *name = malloc(len + 1);
+ strncpy(*name, e+1, len);
+ /* lowercase name */
+ for (i=0; i<len; i++)
+ (*name)[i] = (char)tolower((int)(*name)[i]);
+ return 1;
+ } else
+ return 0;
+}
+
+/* ok, these need to get smarter. if there are quotes, either single or
+ * double, slashes don't count.
+ */
+int nmlend(char *line, int linelen)
+{
+ int i, len;
+ char *e;
+
+ e = haschar(line, linelen, '/');
+ if (e != NULL)
+ return 1;
+ else
+ return 0;
+}
+
+int onlyslash(char *line, int linelen)
+{
+ int i, len;
+ char c;
+
+ for (i=0; i<linelen; i++) {
+ c = line[i];
+ if (isspace(c)) continue;
+
+ if (c == '/') continue;
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int emptyline(char *line, int linelen)
+{
+ int i, len;
+ char c;
+
+ for (i=0; i<linelen; i++) {
+ c = line[i];
+ if (isspace(c)) continue;
+
+ return 0;
+ }
+
+ return 1;
+}
+
+/* stop at commas (outside of quotes), stop name at whitespace */
+int splitme(char *line, int linelen, char **name, char **value)
+{
+ int i, len, startc, endc;
+ char *e;
+
+ e = haschar(line, linelen, '=');
+ if (e == NULL) {
+ *name = NULL;
+ *value = NULL;
+ return 0;
+ }
+
+ /* FIXME: this should start at =, work back, skip any
+ * initial whitespace, then count chars until you get
+ * whitespace again. &nmlname nam=val /
+ * is unusual but technically a valid namelist and
+ * this code doesn't handle it right (yet).
+ */
+ len = (e - line) - 1;
+ /* change 0 to len, search backwards */
+ if (nextname(line, linelen, 0, &startc, &endc)) {
+ len = endc - startc + 1;
+ *name = malloc(len + 1);
+ strncpy(*name, line+startc, len);
+ /* lowercase name */
+ for (i=0; i<len; i++)
+ (*name)[i] = (char)tolower((int)(*name)[i]);
+ } else {
+ *name = NULL;
+ }
+
+ len = (e - line) + 1;
+
+ if (nextvalue(line, linelen, len, &startc, &endc)) {
+ len = endc - startc + 1;
+ *value = malloc(len + 1);
+ strncpy(*value, line+startc, len);
+/*printf("nextvalue returns '%s'\n", *value); */
+ } else {
+ *value = NULL;
+ }
+ return 1;
+}
+
+int nextname(char *line, int linelen, int offset,
+ int *start, int *end)
+{
+ int i, j;
+ char c;
+
+ if (offset >= linelen)
+ return 0;
+
+ for (i=offset; i<linelen; i++) {
+ c = line[i];
+ if (isspace(c)) continue;
+
+ *start = i;
+ for (j=i+1; j<linelen; j++) {
+ c = line[j];
+ if (isspace(c) || c == '=') {
+ *end = j-1;
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int nextvalue(char *line, int linelen, int offset,
+ int *start, int *end)
+{
+ int i, j;
+ int in_squote, in_dquote, in_value;
+ char c;
+
+ if (offset >= linelen)
+ return 0;
+
+ for (i=linelen-1; i>=offset; --i) {
+ c = line[i];
+ if (isspace(c)) continue;
+ if (c == ',') continue;
+ if (c == '/') continue;
+ break;
+ }
+
+ *start = -1;
+ *end = -1;
+ in_squote = 0;
+ in_dquote = 0;
+ in_value = 0;
+ /* leave i alone and start there */
+ for ( ; i>=offset; --i) {
+/* printf("nv: i, c = %d, '%c'\n", i, line[i]); */
+ if (in_squote) {
+ if (line[i] != '\'') continue;
+ in_squote = 0;
+ if (in_value) {
+ *start = i;
+ in_value = 0;
+ }
+ continue;
+ }
+ if (line[i] == '\'') {
+ in_squote = 1;
+ if (!in_value) {
+ if (*end < 0)
+ *end = i;
+ in_value = 1;
+ }
+ continue;
+ }
+ if (in_dquote) {
+ if (line[i] != '"') continue;
+ in_dquote = 0;
+ if (in_value) {
+ *start = i;
+ in_value = 0;
+ }
+ continue;
+ }
+ if (line[i] == '"') {
+ in_dquote = 1;
+ if (!in_value) {
+ if (*end < 0)
+ *end = i;
+ in_value = 1;
+ }
+ continue;
+ }
+
+ c = line[i];
+ if (!in_value) {
+ if (isspace(c)) continue;
+/*printf("in value\n"); */
+ in_value = 1;
+ if (*end < 0)
+ *end = i;
+ } else {
+ if (!isspace(c)) continue;
+/*printf("done value\n"); */
+ in_value = 0;
+ *start = i+1;
+ }
+ }
+ if (in_value) {
+ *start = offset;
+ return 1;
+ }
+
+ if (*start > 0 && *end > 0)
+ return 1;
+ else
+ return 0;
+}
+
+/* return a pointer to the first occurrence of char -- outside of
+ * single or double quotes.
+ */
+char *haschar(char *line, int linelen, char target)
+{
+ int i;
+ int in_squote, in_dquote;
+
+ in_squote = 0;
+ in_dquote = 0;
+ for (i=0; i<=linelen; i++) {
+ if (in_squote) {
+ if (line[i] != '\'') continue;
+ in_squote = 0;
+ continue;
+ }
+ if (line[i] == '\'') {
+ in_squote = 1;
+ continue;
+ }
+ if (in_dquote) {
+ if (line[i] != '"') continue;
+ in_dquote = 0;
+ continue;
+ }
+ if (line[i] == '"') {
+ in_dquote = 1;
+ continue;
+ }
+ if (line[i] == target) {
+ return line+i;
+ }
+ }
+
+ return NULL;
+}
+
+int justvalue(char *line, int linelen, char **value)
+{
+ int i, len, startc, endc;
+ char *e;
+
+ if (nextvalue(line, linelen, 0, &startc, &endc)) {
+ len = endc - startc + 1;
+ *value = malloc(len + 1);
+ strncpy(*value, line+startc, len);
+/* printf("justvalue returns '%s'\n", *value); */
+ } else {
+ *value = NULL;
+ }
+ return 1;
+}
+
+/* return the length of the longest name in an nml */
+int longestname(struct nml *nl)
+{
+ int i, j;
+ int longest = 0;
+
+ for (i=0; i<nl->nitems; i++) {
+ if (strlen(nl->nvp[i].name) > longest)
+ longest = strlen(nl->nvp[i].name);
+ }
+
+ return longest;
+}
+
Property changes on: DART/branches/development/utilities/clean_nml.c
___________________________________________________________________
Added: svn:mime-type
+ text/plain
Added: svn:keywords
+ Date Rev Author HeadURL Id
Added: svn:eol-style
+ native
Modified: DART/branches/development/utilities/swabrestart.c
===================================================================
--- DART/branches/development/utilities/swabrestart.c 2012-10-24 19:51:55 UTC (rev 5904)
+++ DART/branches/development/utilities/swabrestart.c 2012-10-29 16:39:51 UTC (rev 5905)
@@ -1,3 +1,15 @@
+/* DART software - Copyright 2004 - 2011 UCAR. This open source software is
+ * provided by UCAR, "as is", without charge, subject to all terms of use at
+ * http://www.image.ucar.edu/DAReS/DART/DART_download
+ */
+
+/* <next few lines under version control, do not edit>
+ * $URL$
+ * $Id$
+ * $Revision$
+ * $Date$
+ */
+
/*
* swap bytes in a binary/unformatted DART restart file.
* (swab == the machine instruction for swap bytes)
More information about the Dart-dev
mailing list