/* randomize: A simple IPv4 number randomizer which replaces every found IP number in a file by another random number of the same length and same class/block (public, 10.*.*.*, 192.168.*.*, ...), so the file length is unchanged. Access and modification time of the file are also unchanged. Usefull e. g. for randomizing test persons data, logging messages without the original IP numbers e. g. for debugging, for legal logfiles with no original IP numbers (http://www.golem.de/0710/55076.html), etc.. Tested under linux, should work with every posix conform OS and Cygwin. Todo: - using getopt for options - performance tuning - option 6 for replacing IPv6 numbers - options for special handling of broadcast and net numbers; e. g. 192.168.1.255 and 0.0.0.0 - option l for replacing only the last bits of the IP-Nr. - option M for replacing MACs (hardware addresses) - option n for hostname anonymisation - handling of leading zeros and whitespaces in IP numbers; handling of other notations than the dotted decimal - option f for replacing public IP numbers by others, which are NOT from - option p for replacing all IP numbers by random public ones. Usefull for loged default IP numbers like 127.0.0.1 Dr. Rolf Freitag 2008-02-19 First quick hack which works with every test file and with optimized bitmasking for random IP numbers, V 0.1 2008-02-20 Eleminated wrong bitmasks for some IPv4 blocks, works with a log file of about 100 MB and 1 M IP numbers, V 0.2 2008-03-01 Works with dozens of log files of about 1 GB, added a global flag for gracefull exit, V 0.3 * ---------------------------------------------------------------------------- * "THE BEERWARE LICENSE" (Revision 44): * Dr. Rolf Freitag (rolf dot freitag at email dot de) wrote this file. * As long as you retain this notice you can do whatever * the GPL (GNU Public License version 3) allows with this stuff. * If you think this stuff is worth it, you can send me money via * paypal, and get a contribution receipt if you whish, or if we met some day * you can buy me a beer in return. * ---------------------------------------------------------------------------- */ #include #include #include #include #include #include #include #include // signals #include #include #include #include #include #include #include #include // UCHAR_MAX #include #include // define for simple debugging //#define DEBUG #ifndef __FUNCTION__ # define __FUNCTION__ __func__ #endif // print basic debug info #if defined(DeBuG) || defined(DEBUG) # define mc_DEBUGSTART {(void)fprintf(stderr, "mc_DEBUGSTART: __VERSION__: %s, __DATE__: %s, __TIME__: %s\n", \ __VERSION__, __DATE__, __TIME__); \ (void)fprintf(stderr, " __STDC__: %d, __STDC_HOSTED__: %d, __STDC_IEC_559__: %d.\n", \ __STDC__, __STDC_HOSTED__, __STDC_IEC_559__);} #else # define mc_DEBUGSTART #endif // print debug info #if defined(DeBuG) || defined(DEBUG) # define mc_DEBUG(fmt, args...) { (void)fprintf(stderr, "mc_DEBUG: \"%s\", \"%s\", line %d: ", \ __FILE__, __FUNCTION__, __LINE__); (void)fprintf(stderr, fmt, ## args); } #else # define mc_DEBUG(fmt, args...) #endif // simple version number #define SOFTWARE_VERSION_NUMBER 0.3 // macros for conversion from/to dotted decimal notation #define mc_MERGE_IP(i3,i2,i1,i0) ((i3 << 24) +(i2 << 16) +(i1 << 8) +i0) #define mc_SPLIT_IP0(i) (i bitand 0xff) #define mc_SPLIT_IP1(i) ((i bitand 0xff00) >> 8) #define mc_SPLIT_IP2(i) ((i bitand 0xff0000) >> 16) #define mc_SPLIT_IP3(i) ((i bitand 0xff000000) >> 24) // reverse (mirror) u32 number n #define mc_REVERSE(n) \ {\ n = ((n >> 1) & 0x55555555) | ((n << 1) & 0xaaaaaaaa); \ n = ((n >> 2) & 0x33333333) | ((n << 2) & 0xcccccccc); \ n = ((n >> 4) & 0x0f0f0f0f) | ((n << 4) & 0xf0f0f0f0); \ n = ((n >> 8) & 0x00ff00ff) | ((n << 8) & 0xff00ff00); \ n = ((n >> 16) & 0x0000ffff) | ((n << 16) & 0xffff0000); \ } // easter egg #define mc_ADVICE if (argc > 1 && 0 == strncmp (argv[1], "-advice", 8) ) \ { \ (void)printf ("Don't Panic!\n"); \ exit (42); \ } /* 42: The meaning of life, the universe, and everything ;-) */ // For registering the signal handler void sig_handler (int sig). This macro saves 32 bytes ;-) #define mc_register_sig_handler \ {\ int i;\ for(i=0; i<=UCHAR_MAX; i++)\ signal (i, sig_handler);\ } static uint32_t u32_rand; // for storing an internal true random number volatile int i_exit_flag = 0; // flag for exiting soon (not immediately) and controlled after a signal to terminate void sig_handler (int sig) // simple signal handler { if ((SIGINT == sig) or (SIGILL == sig) or (SIGKILL == sig) or (SIGSEGV == sig) or (SIGTERM == sig)) { i_exit_flag = 1; // set the flag for a graceful exit } else { (void) printf ("Signal %d - ignoring ... \r\n\a", sig); } return; } // sig_handler // IPv4 number classes (private +reserved +multicast +broadcast +...). See also Wikipedia and CIDR address blocks and // http://iana.org/assignments/ipv4-address-space/ int class (uint32_t u32) { const int i_ret = -1; // default class: public IP number // classify if (u32 <= mc_MERGE_IP (0, 255, 255, 255)) return (1); // first private IP number class, range 0.0.0.0 - 0.255.255.255 if ((u32 >= mc_MERGE_IP (10, 0, 0, 0)) and (u32 <= mc_MERGE_IP (10, 255, 255, 255))) return (2); // second private IP number class if ((u32 >= mc_MERGE_IP (39, 0, 0, 0)) and (u32 <= mc_MERGE_IP (39, 255, 255, 255))) return (3); if ((u32 >= mc_MERGE_IP (127, 0, 0, 0)) and (u32 <= mc_MERGE_IP (127, 255, 255, 255))) return (4); if ((u32 >= mc_MERGE_IP (128, 0, 0, 0)) and (u32 <= mc_MERGE_IP (128, 0, 255, 255))) return (5); if ((u32 >= mc_MERGE_IP (169, 254, 0, 0)) and (u32 <= mc_MERGE_IP (169, 254, 255, 255))) return (6); if ((u32 >= mc_MERGE_IP (172, 16, 0, 0)) and (u32 <= mc_MERGE_IP (172, 31, 255, 255))) return (7); if ((u32 >= mc_MERGE_IP (191, 255, 0, 0)) and (u32 <= mc_MERGE_IP (191, 255, 255, 255))) return (8); if ((u32 >= mc_MERGE_IP (192, 0, 0, 0)) and (u32 <= mc_MERGE_IP (192, 0, 0, 255))) return (9); if ((u32 >= mc_MERGE_IP (192, 0, 2, 0)) and (u32 <= mc_MERGE_IP (192, 0, 2, 255))) return (10); if ((u32 >= mc_MERGE_IP (192, 168, 0, 0)) and (u32 <= mc_MERGE_IP (192, 168, 255, 255))) return (11); if ((u32 >= mc_MERGE_IP (198, 18, 0, 0)) and (u32 <= mc_MERGE_IP (198, 18, 127, 255))) return (12); if ((u32 >= mc_MERGE_IP (223, 255, 255, 0)) and (u32 <= mc_MERGE_IP (223, 255, 255, 255))) return (13); mc_DEBUG ("class %d\n", i_ret); return (i_ret); // return default } // class // a simple random number generator uint32_t u32rand (void) { uint32_t u32; int ifd = open ("/dev/urandom", O_RDONLY); if (-1 == ifd) { mc_DEBUG ("/dev/urandom could not be opened!!!\n"); u32 ^= (uint32_t) time (NULL) - RAND_MAX + (uint32_t) rand (); // fallback/failsafe random number } else { (void) read (ifd, &u32, 4); (void) close (ifd); } u32 ^= u32_rand; // mix with an old value u32_rand = u32; // store the actual value mc_REVERSE (u32_rand); // shuffle the stored value mc_DEBUG ("u32rand %u\n", u32_rand); return (u32); } // u32rand // return the length of the positive decimal number (without leading zeros etc.) int number_length (uint32_t u32) { int i_ret = 0; do { u32 /= 10; i_ret++; } while (u32); // mc_DEBUG ("length %d\n", i_ret); return (i_ret); } // number_length // return the length of the IPv4 number in dotted decimal notation, without leading zeros int ipv4_number_length (uint32_t u32) { int i_ret = number_length (mc_SPLIT_IP0 (u32)); i_ret += number_length (mc_SPLIT_IP1 (u32)); i_ret += number_length (mc_SPLIT_IP2 (u32)); i_ret += number_length (mc_SPLIT_IP3 (u32)); mc_DEBUG ("IPv4 number length %d\n", i_ret + 3); return (i_ret + 3); // length of the four bytes in decimal notation plus three dots } // ipv4_number_length // return a random IPv4 number of the same class which is different uint32_t randomize_ip (const uint32_t u32_in) { int i = 0, i_deadline_counter = 0, i_class = class (u32_in); const int i_length = ipv4_number_length (u32_in); uint32_t u32 = 0; mc_DEBUG ("i_class %d\n", i_class); switch (i_class) { case -1: // (other) public IPV4 number, > 0.255.255.255, <= 255.255.255.255 +not private ... for (; (-1 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (7 == i_length) // very short number; maximum 9.9.9.9 { u32 and_eq 0x0f0f0f0f; // bitmask 15.15.15.15 } else { if (8 == i_length) // short number; maximum 99.9.9.9 u32 and_eq 0x7f7f7f7f; // bitmask 127.127.127.127 } i = class (u32); mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class } break; case 1: // 0.0.0.0 - 0.255.255.255 for (; (1 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (7 == i_length) // very short number; maximum 0.9.9.9 { u32 and_eq 0x000f0f0f; // set MSB and highest nibbles to 0 (maximum 0.15.15.15) } else { if (8 == i_length) // short number; maximum 0.99.9.9 u32 and_eq 0x007f7f7f; // set MSB and next MSbs to 0 (maximum 0.127.127.127) else u32 and_eq 0x00ffffff; // set MSB to 0 } mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 1; } break; case 2: // 10.0.0.0 - 10.255.255.255 for (; (2 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (8 == i_length) // very short number; maximum 10.9.9.9 { u32 and_eq 0x000f0f0f; // bitmask 0.15.15.15 } else { if (9 == i_length) // short number; maximum 10.99.9.9 u32 and_eq 0x007f7f7f; // bitmask 0.127.127.127 else u32 and_eq 0x00ffffff; // set MSB to 0 } u32 or_eq 0x0A000000; // set MSB to 10 mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 2; } break; case 3: // 39.0.0.0 - 39.255.255.255 for (; (3 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (8 == i_length) // very short number; maximum 39.9.9.9 { u32 and_eq 0x000f0f0f; // bitmask 0.15.15.15 } else { if (9 == i_length) // short number; maximum 39.99.9.9 u32 and_eq 0x007f7f7f; // bitmask 0.127.127.127 else u32 and_eq 0x00ffffff; // set MSB to 0 } u32 or_eq 0x27000000; // set MSB to 39 mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 3; } break; case 4: // 127.0.0.0 - 127.255.255.255 for (; (4 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (9 == i_length) // very short number; maximum 127.9.9.9 { u32 and_eq 0x000f0f0f; // bitmask 0.15.15.15 } else { if (10 == i_length) // short number; maximum 127.99.9.9 u32 and_eq 0x007f7f7f; // bitmask 0.127.127.127 else u32 and_eq 0x00ffffff; // set MSB to 0 } u32 or_eq 0x7f000000; // set MSB to 127 i = class (u32); mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 4; } break; case 5: // 128.0.0.0 - 128.0.255.255 for (; (5 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (9 == i_length) // very short number; maximum 128.0.9.9 { u32 and_eq 0x00000f0f; // bitmask 0.0.15.15 } else { u32 and_eq 0x0000ffff; // clear constant bits } u32 or_eq 0x80000000; // set MSB to 128, set Byte 2 to 0 i = class (u32); mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 5; } break; case 6: // 169.254.0.0 - 169.254.255.255 for (; (6 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (11 == i_length) // short number; maximum 169.254.9.9 { u32 and_eq 0x00000f0f; // bitmask 0.0.15.15 } else { u32 and_eq 0x0000ffff; // clear constant bits } u32 or_eq 0xa9fe0000; // set MSB to 169, set Byte 2 to 254 mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 6; } break; case 7: // 172.16.0.0 - 172.31.255.255 for (; (7 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (10 == i_length) // short number; maximum 172.16.9.9 { u32 and_eq 0x000f0f0f; // bitmask 0.15.15.15 } else { u32 and_eq 0x000fffff; // clear constant bits } u32 or_eq 0xac100000; // set 172.16+x.*.* mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 7; } break; case 8: // 191.255.0.0 - 191.255.255.255 for (; (8 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (11 == i_length) // short number; maximum 191.255.9.9 { u32 and_eq 0x00000f0f; // bitmask 0.0.15.15 } else { u32 and_eq 0x0000ffff; // clear constant bits } u32 or_eq 0xbfff0000; // set MSB to 172, set Byte 2 to 0x1* mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 8; } break; case 9: // 192.0.0.0 - 192.0.0.255 for (; (9 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (9 == i_length) // short number; maximum 192.0.0.9 { u32 and_eq 0x0000000f; // bitmask 0.0.0.15 } else { u32 and_eq 0x000000ff; // clear constant bits } u32 or_eq 0xc0000000; // set 192.0.0.* mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 9; } break; case 10: // 192.0.2.0 - 192.0.2.255 for (; (10 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (9 == i_length) // short number; maximum 192.0.2.9 { u32 and_eq 0x0000000f; // bitmask 0.0.0.15 } else { u32 and_eq 0x000000ff; // clear constant bits } u32 or_eq 0xc0000200; // set 192.0.2.* mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 10; } break; case 11: // 192.168.0.0 - 192.168.255.255 for (; (11 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (i_length == 11) // short number, maximum 192.168.9.9 { u32 and_eq 0x00000f0f; // bitmask for 0.0.15.15 } else { u32 and_eq 0x0000ffff; // clear constant bits } u32 or_eq 0xc0a80000; // set 192.168.*.* mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 11; } break; case 12: // 198.18.0.0 - 198.18.127.255 for (; (12 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (i_length == 10) // short number, maximum 198.18.9.9 { u32 and_eq 0x00000f0f; // bitmask for 0.0.15.15 } else { u32 and_eq 0x00007fff; // clear constant bits } u32 or_eq 0xc6120000; // set 198.18.*.* mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 12; } break; case 13: // 223.255.255.0 - 223.255.255.255 for (; (13 not_eq i) or (ipv4_number_length (u32) not_eq i_length); i_deadline_counter++) { u32 = u32rand (); if (13 == i_length) // short number; maximum 223.255.255.9 { u32 and_eq 0x0000000f; // bitmask 63.15.15.15 } else { u32 and_eq 0x000000ff; // clear constant bits } u32 or_eq 0xdfffff00; // set the two highest bits (for 223.255.255.255 ... 255.255.255.255) mc_DEBUG ("u32 %u, class %d, i_deadline_counter %d\n", u32, i, i_deadline_counter); if (u32_in == u32) // same number i = 0; // invalid class else i = 13; } break; default: (void) fprintf (stderr, "invalid class %d\n", i_class); u32 = u32rand (); break; } // switch mc_DEBUG ("randomized number: %u\n", u32); return (u32); } // randomize_ip int main (const int argc, const char *const argv[]) { int i = 0, i_counter = 0; uint32_t u32 = 0, ui1 = 0, ui2 = 0, ui3 = 0, ui4 = 0; // char ca_file_name[FILENAME_MAX] = { '\0' }; char c_trash[123] = { '\0' }; char ca_in_line[123] = { 0 }; FILE *ifp = NULL; // (input) file pointer struct stat sa; struct utimbuf sutimb; mc_DEBUGSTART; // print basic debug info (if debug mode is set) mc_ADVICE; // easteregg mc_register_sig_handler; // register the signal handler // init rand u32_rand = 0x42 + (unsigned int) getpid () + (unsigned int) getgid () + (unsigned int) geteuid () + (unsigned int) time (NULL); srand (u32_rand); // a very simple check of the arguments if (2 not_eq argc) { (void) fprintf (stdout, "%s version %.1f\n", argv[0], SOFTWARE_VERSION_NUMBER); exit (-1); } if (argc == 2) // second argument: input/output file { (void) snprintf (ca_file_name, sizeof (ca_file_name), argv[1]); } // check the file type and cache the meta data if (-1 == lstat (ca_file_name, &sa) or ! S_ISREG (sa.st_mode)) { (void) fprintf (stderr, "lstat(%s) failed\n", ca_file_name); perror ("lstat"); return (-1); } sutimb.actime = sa.st_atime; // time of last file access sutimb.modtime = sa.st_mtime; // time of last file modification // open the file ifp = fopen (ca_file_name, "r+"); mc_DEBUG ("opened file %s\n", ca_file_name); if (NULL == ifp) // no input { (void) fprintf (stderr, "Can not open %s, exiting.\n", ca_file_name); exit (-1); } for (i_counter = 0; (not feof (ifp)) and (not i_exit_flag); i_counter++) // till file end or no terminating signal { // start the loop with scanning // go to the actual byte in the file (void) fseek (ifp, i_counter, SEEK_SET); // read (up to) 15 byte = maxumum IPv4 number length in dotted decimal notation i = fread (ca_in_line, 1, 15, ifp); if (i < 7) // if not enough bytes could be read; file end break; if (not isdigit (ca_in_line[0])) // the first byte is not a digit continue; // next try (loop) // now the first byte is a digit; look for an IPv4 number i = sscanf (ca_in_line, "%u.%u.%u.%u", &ui1, &ui2, &ui3, &ui4); // try to read the IP +other bytes if (4 not_eq i) // no success { i = sscanf (ca_in_line, "%u.%u.%u.%u%s", &ui1, &ui2, &ui3, &ui4, c_trash); // try to read the IP +other bytes if (5 not_eq i) { continue; // no IPv4 number found; next try (loop) } } mc_DEBUG ("found IPv4 number %u.%u.%u.%u\n", ui1, ui2, ui3, ui4); // now an IPv4 number has been found if ((ui1 < 256) and (ui2 < 256) and (ui3 < 256) and (ui4 < 256)) // valid IPv4 number? { // replace the IP number u32 = randomize_ip (mc_MERGE_IP (ui1, ui2, ui3, ui4)); // randomize // decompose ui1 = mc_SPLIT_IP3 (u32); ui2 = mc_SPLIT_IP2 (u32); ui3 = mc_SPLIT_IP1 (u32); ui4 = mc_SPLIT_IP0 (u32); // go to the actual byte in the file (void) fseek (ifp, i_counter, SEEK_SET); (void) fprintf (ifp, "%u.%u.%u.%u", ui1, ui2, ui3, ui4); // replace the old IPv4 number by overwriting mc_DEBUG ("replaced by %u.%u.%u.%u\n", ui1, ui2, ui3, ui4); // set the "pointer" in the file behind the last byte of the actual IPv4 number i_counter += ipv4_number_length (u32); } } // for (i_counter=0; ... // cleanup: flush caches, close files, set old timestamps (void) fflush (stdout); (void) fflush (ifp); if (ifp != NULL) (void) fclose (ifp); // preserve access and modification time of the file if (-1 == utime (ca_file_name, &sutimb)) { (void) fprintf (stderr, "utime(%s, ...) failed\n", ca_file_name); perror ("utime"); return (-1); } return (0); } // main