[Vari] Contest 4: DNA - Pagina 6

gugoXX · 09-08-2008, 23:57

Rilancio di nuovo

Quote:

Maximum common length 29
Maximum substring indexes 50002,70002
Substring1: ACTGTCCTGAAGATCGCTTGGCATCTCCG
Substring2: ACTGTCCTGCAGATCGCTTTGCATCTCCG
257ms

Con questo codice.

Codice:

static void Main(string[] args)
{
    string input1 ="";
    string input2 = "";
   
    input1 = File.ReadAllText(@"C:\temp\DNA1.txt");
    input2 = File.ReadAllText(@"C:\temp\DNA2.txt");
                
    Stopwatch sw = new Stopwatch();
    sw.Start();            
    Run Current = TryV5.Get(input1, input2, 2);
    sw.Stop();

    Console.WriteLine("Maximum common length {0}", Current.length);
    Console.WriteLine("Maximum substring indexes {0},{1}", Current.index0, Current.index1);

    string s0 = input1.Substring(Current.index0, Current.length);
    string s1 = input2.Substring(Current.index1, Current.length);

    Console.Write("Substring1: ");
    for (int t = 0; t < Current.length; t++)
    {
        if (s0[t] == s1[t]) Console.ForegroundColor = ConsoleColor.White;
        else Console.ForegroundColor = ConsoleColor.Red;
        Console.Write(s0[t]);
    }
    Console.WriteLine();
    Console.Write("Substring2: ");
    for (int t = 0; t < Current.length; t++)
    {
        if (s0[t] == s1[t]) Console.ForegroundColor = ConsoleColor.White;
        else Console.ForegroundColor = ConsoleColor.Red;
        Console.Write(s1[t]);
    }
    Console.WriteLine();
    
    Console.WriteLine("{0}ms",sw.ElapsedMilliseconds);
    Console.ReadKey();
}        

public static class TryV5
{
    static string input1;
    static string input2;
    static int len1;
    static int len2;
    static int ERR;                        

    public static Run Get(string i_input1, string i_input2, int i_ERR)
    {
        input1 = i_input1;
        input2 = i_input2;
        if (input2.Length > input1.Length)
        {
            string tmp = input1;
            input1 = input2;
            input2 = tmp;
        }
       
        ERR = i_ERR;

        len1 = input1.Length;
        len2 = input2.Length;

        var Upper = new Dictionary<string, List<int>>[100];
        var Lower = new Dictionary<string, List<int>>[100];
        
        int length = (int)(Math.Log(len1, 4));
        //int length = 6;
        bool stay;
        do
        {
            length++;                                        
            var mup = Upper[length] = BuildDict(input1, length);
            var mdw = Lower[length] = BuildDict(input2, length);
            
            var tst = from m in mup
                      join n in mdw on m.Key equals n.Key
                      select true;

            int lntest = Math.Min(mup.Count, 1<<19);                    
            int take = tst.Take(lntest).Count();
            stay = (take == lntest);
            if (take == 0) length -= 2;                   
        } while (stay);

        Run Winner = new Run();
        int minlen = GetMinLen(length);
        
        for(int ln=length;ln>=minlen;ln--)
        {
            var mup = Upper[ln];
            if (mup == null) mup = BuildDict(input1, ln);
            var mdw = Lower[ln];
            if (mdw == null) mdw = BuildDict(input2, ln);

            var tst = from m in mup
                      join n in mdw on m.Key equals n.Key
                      from u in m.Value
                      from d in n.Value
                      select Search(u, d, ln);
                               
            Run best = tst.Max();
            if (best.length > Winner.length)
            {
                Winner = best;
                minlen = GetMinLen(best.length);
                if (minlen > ln) break;
            }                    
        }
        return Winner;
    }            

    public static int GetMinLen(int curmaxlen)
    {
        int minlen = (int)Math.Ceiling((float)(curmaxlen) / (float)(ERR+1));
        return minlen;
    }              
    
    private static Run Search(int upoffset, int downoffset,int sure)
    {                
        int[] Pre = new int[ERR+1];
        int[] Post = new int[ERR+2];

        //SearchPost

        bool lasterr = false;
        for(int erfnd=0,pch1=upoffset+sure,pch2=downoffset+sure; erfnd<=ERR && pch1<len1 && pch2<len2 ;pch1++,pch2++)
        {                    
            char ch1 = input1[pch1];
            char ch2 = input2[pch2];
            if (ch1 == ch2) Post[erfnd]++;
            else
            {
                if (lasterr) break;
                erfnd++;
                lasterr = true;
            }
        }
        sure += Post[0];

        //SearchPre
        lasterr = true;
        for (int erfnd = 0, pch1=upoffset-2, pch2=downoffset-2; erfnd < ERR && pch1>=0 && pch2>=0; pch1--,pch2--)
        {
            char ch1 = input1[pch1];
            char ch2 = input2[pch2];
            if (ch1 == ch2) Pre[erfnd]++;
            else
            {
                if (lasterr) break;
                erfnd++;
                lasterr = true;
            }
        }

        int pr = 0;
        int po = 1;
        for (int t = 0; t < ERR; t++)
        {
            int ppd = Post[po];
            int ppr = Pre[pr];
            if ((ppd == 0) && (ppr == 0)) break;
            if (ppd > ppr) po++;
            else pr++;
        }

        int ofprim = 0;
        for (int t = 0; t < pr; t++)
        {
            ofprim += (Pre[t] + 1);
        }

        int ofdop = 0;
        for (int t = 1; t < po; t++)
        {
            ofdop += (Post[t] + 1);
        }               

        int len = sure + ofprim + ofdop;
        return new Run(len, upoffset - ofprim, downoffset - ofprim);
    }

    private static Dictionary<string, List<int>> BuildDict(string input, int len)
    {
        int fin = input.Length - len;
        var ret = new Dictionary<string, List<int>>(fin);
        
        for (int t = 0; t < fin; t++)
        {
            string str = input.Substring(t, len);
            List<int> adder;
            if (!ret.TryGetValue(str, out adder))
            {
                adder = ret[str] = new List<int>();
            }
            adder.Add(t);
        }
        return ret;
    }                        

    public class CRun
    {
        public int offset1;
        public int offset2;
        public int length=1;
        public int err = 0;
        public bool previouserr = false;                
        public CRun(int i_offset1,int i_offset2)
        {
            offset1 = i_offset1;
            offset2 = i_offset2;
        }
    }
}

Vincenzo1968 · 10-08-2008, 06:06

Quote:

Originariamente inviato da gugoXX

... sarebbe opportuno avere una funzione ben separata, che accetta in input 2 stringhe e il numero di errori, cosicche' possiamo disporre piu' o meno tutti della stessa interfaccia.
Come output, dato che e' richiesto piu' di un valore (2 offset e la lunghezza), puoi usare una struct con il C

eqque qua:

Codice:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <time.h>

#define FILE1 "dna1.txt"
#define FILE2 "dna2.txt"

//#define FILE1 "dna1b.txt"
//#define FILE2 "dna2b.txt"

//#define FILE1 "dna1c.txt"
//#define FILE2 "dna2c.txt"

#define BUFFER_SIZE 4096
#define MAX_STACK 100
#define PREFIX_SIZE 8

typedef struct tagRisultato
{
	int pos1;
	int pos2;
	char p1[1024];
	char p2[1024];
	int len;
	double tempo;
} Risultato;

typedef struct tagLista
{
	int pos;
	struct tagLista* next;
} Lista;

typedef struct tagTree
{
	char key[PREFIX_SIZE];
	Lista *pLista;
	struct tagTree *left;
	struct tagTree *right;
} Tree;

Lista* ListNewNode(int val);
Lista* ListAppend(Lista* first, int val);
void ListFree(Lista* first);

Tree *TreeNewNode(char *pKey, int pos);
Tree *TreeInsertNode(Tree *node, char * pKey, int pos);
void TreeSearch(Tree *head, Tree **result, char *pKey);
void TreeFree(Tree *head);

Lista* ListNewNode(int val)
{
	Lista *n;

	n = (Lista *)malloc(sizeof(Lista));

	if( n == NULL )
		return NULL;

	n->pos = val;
	n->next = NULL;

	return n;
}

Lista* ListAppend(Lista* first, int val)
{
	Lista *n = first, *nuovo;

	if ( first == NULL )
		return ListNewNode(val);

	n = first;
	while( n->next != NULL )
	{
		n = n->next;
	}

	nuovo = ListNewNode(val);
	n->next = nuovo;

	return first;
}

void ListFree(Lista* first)
{
	Lista *n1 = first, *n2;
	while ( n1 != NULL )
	{
		n2 = n1->next;
		free(n1);
		n1 = n2;
	}
}

Tree *TreeNewNode(char *pKey, int pos)
{
	Tree *r;
	Lista *l;

	r = (Tree *) malloc(sizeof(Tree));
	if(!r)
	{
		printf("Memoria insufficiente.\n");
		return NULL;
	}

	l = (Lista *) malloc(sizeof(Lista));
	if(!l)
	{
		printf("Memoria insufficiente.\n");
		return NULL;
	}
	l->pos = pos;
	l->next = NULL;
	

	strcpy(r->key, pKey);
	r->pLista = l;
	r->left = NULL;
	r->right = NULL;

	return r;
}

Tree *TreeInsertNode(Tree *node, char * pKey, int pos)
{
	int res;
	Tree *pRadice = NULL;

	if( !node )
	{
		node = TreeNewNode(pKey, pos);
		return node;
	}

	pRadice = node;
	
	while( 1 )
	{
		res = strcmp(pKey, node->key);
		if ( res < 0 )
		{
			if ( !node->left )
			{
				node->left = TreeNewNode(pKey, pos);
				break;
			}
			node = node->left;
		}
		else if ( res > 0 )
		{
			if ( !node->right )
			{
				node->right = TreeNewNode(pKey, pos);
				break;
			}
			node = node->right;
		}
		else
		{
			node->pLista = ListAppend(node->pLista, pos);
			break;
		}

	}

	node = pRadice;

	return node;
}

void TreeSearch(Tree *head, Tree **result, char *pKey)
{
	int res;
	Tree *node;

	*result = NULL;
	node = head;

	if ( !head )
		return;

	while( 1 )
	{
		res = strcmp(pKey, node->key);
		if ( res < 0 )
		{
			if ( !node->left )
				break;
			node = node->left;
		}
		else if ( res > 0 )
		{
			if ( !node->right )
				break;
			node = node->right;
		}
		else // key == node->data
		{
			*result = node;
			break;
		}
	}
}

void TreeFree(Tree *head)
{
	Tree *temp1, *temp2;

	Tree *stack[MAX_STACK];
	int top;

	top = 0;
 
	if ( !head )
		return;

	temp1 = temp2 = head;

	while ( temp1 != NULL )
	{
		for(; temp1->left != NULL; temp1 = temp1->left)
			stack[top++] = temp1;

		while ( (temp1 != NULL) && (temp1->right == NULL || temp1->right == temp2) )
		{
			temp2 = temp1;
			ListFree(temp2->pLista);
			free(temp2);
			if ( top == 0 )
				return;
			temp1 = stack[--top];
		}

		stack[top++] = temp1;
		temp1 = temp1->right;
	}
}

int LeggiDimensioniFile(char *szFileName)
{
	FILE *fp;
	long k;

	fp = fopen(szFileName, "rb");

	if ( fp == NULL )
		return 0;

	if ( fseek(fp, 0, SEEK_END) )
	{
		fclose(fp);
		return 0;
	}

	k = ftell(fp);

	fclose(fp);

	return k;
}

int LeggiStringa(char *szFileName, char *buffer, int dimFile)
{
	FILE *fp;

	fp = fopen(szFileName, "r");

	if ( fp == NULL )
		return 0;

	if ( fgets(buffer, dimFile+1, fp) == NULL )
	{
		printf("\nErrore nella lettura del file %s\n", szFileName);
		fclose(fp);
		return 0;
	}
	*(buffer + dimFile) = '\0';

	fclose(fp);

	return dimFile;
}

void Trova(char *szNomeFile1, char *szNomeFile2, Risultato *pRisultato)
{
	FILE *fp;
	char strTempo[512];

	Tree *pTree;
	Tree *pNode;
	Lista *pLista;
	int p1_len, p2_len;
	int k;
	int len, len_prec;
	int numErrors;
	int bTrovato;
	int MaxPrefix;

	int x;

	char strSearch[1024] = "";

	char strRes[1024] = "";
	char strTemp1[1024] = "";
	char strTemp2[1024] = "";

	clock_t c_start, c_end;

	int pos1, pos2;
	int pos1x, pos2x;

	int post1, post2;

	char *p1 = NULL;
	char *p2 = NULL;

	c_start = clock();

	p1_len = LeggiDimensioniFile(szNomeFile1);
	p2_len = LeggiDimensioniFile(szNomeFile2);

	p1 = (char*)malloc(sizeof(char)*p1_len + 1);
	if ( !p1 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p1[0] = '\0';

	p2 = (char*)malloc(sizeof(char)*p2_len + 1);
	if ( !p2 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p2[0] = '\0';

	if ( !LeggiStringa(szNomeFile1, p1, p1_len) )
		return;

	if ( !LeggiStringa(szNomeFile2, p2, p2_len) )
		return;

	if ( PREFIX_SIZE < p2_len )
		MaxPrefix = PREFIX_SIZE;
	else
		MaxPrefix = p2_len;

	if ( p1_len < PREFIX_SIZE )
	{
		printf("\n\nLa stringa piu' lunga risulta composta da %d caratteri.\nFai prima se ti apri i due file con blocco note e te le cerchi da solo.\nCiao ciao.\n\n", p1_len);

		pRisultato->len = len;
		pRisultato->pos1 = pos1x;
		pRisultato->pos2 = pos2x;
		strcpy(pRisultato->p1, strTemp1);
		strcpy(pRisultato->p2, strTemp2);
		pRisultato->tempo = 0;

		free(p1);
		free(p2);

		return;
	}
	
	for ( x = MaxPrefix; x > 0; x--)
	{
		pTree = NULL;
		pos1 = 0;
		while ( pos1 < p1_len - x )
		{
			memcpy(strSearch, p1 + pos1, x);
			*(strSearch + x) = '\0';

			pTree = TreeInsertNode(pTree, strSearch, pos1);

			pos1++;
		}

		pos1 = 0;
		pos2 = 0;
		strTemp1[0] = '\0';
		strTemp2[0] = '\0';
		strSearch[0] = '\0';
		pLista = NULL;
		pNode = NULL;
		len = 0;
		len_prec = 0;
		numErrors = 0;
		bTrovato = 0;

		while ( pos2 < p2_len - x )
		{
			memcpy(strSearch, p2 + pos2, x);
			*(strSearch + x) = '\0';

			TreeSearch(pTree, &pNode, strSearch);
			if ( pNode != NULL )
			{
				pLista = pNode->pLista;

				while ( pLista != NULL )
				{
					pos1 = pLista->pos;
					numErrors = 0;
					k = x;
					post1 = pos1;
					post2 = pos2;
					if ( pos2 > x && pos1 > x )
					{
						k = 0;
						while ( (*(p2 + pos2) == *(p1 + pos1)) || (numErrors < 2) )
						{
							if ( *(p2 + pos2) != *(p1 + pos1) )
								numErrors++;
							pos1--;
							pos2--;
							if ( pos2 < 0 || pos1 < 0 )
								break;
						}
						pos1 += 1;
						pos2 += 1;
						numErrors = 0;
					}
					while ( (*(p2 + pos2 + k) == *(p1 + pos1 + k)) || (numErrors < 2) )
					{
						if ( *(p2 + pos2 + k) != *(p1 + pos1 + k) )
							numErrors++;
						k++;
					}
					if ( k > len_prec )
					{
						len = k;
						pos1x = pos1;
						pos2x = pos2;
						len_prec = len;

						bTrovato = 1;
					}
					pos2 = post2;
					pLista = pLista->next;
				}
			}

			pos2++;
		}
		TreeFree(pTree);
		if ( bTrovato )
			break;
	}

	pos1 = pos1x;
	pos2 = pos2x;
	len_prec = len;
	numErrors = 0;
	while ( (*(p1 + pos1) == *(p2 + pos2)) || (numErrors < 2) )
	{
		if ( *(p2 + pos2) != *(p1 + pos1) )
			numErrors++;
		pos2++;
		pos1++;
		if ( numErrors == 2 )
			break;
	}
	len = 0;
	numErrors = 0;
	while ( (*(p1 + pos1 + len) == *(p2 + pos2 + len)) || (numErrors < 2) )
	{
		if ( *(p2 + pos2 + len) != *(p1 + pos1 + len) )
			numErrors++;
		len++;
	}
	if ( len_prec < len )
	{
		pos1x = pos1;
		pos2x = pos2;
	}
	else
	{
		len = len_prec;
	}

	memcpy(strTemp1, p1 + pos1x, len);
	*(strTemp1 + len) = '\0';

	memcpy(strTemp2, p2 + pos2x, len);
	*(strTemp2 + len) = '\0';

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			strTemp1,
			strTemp2,
			len,
			pos1x,
			pos2x);

	pRisultato->len = len;
	pRisultato->pos1 = pos1x;
	pRisultato->pos2 = pos2x;
	strcpy(pRisultato->p1, strTemp1);
	strcpy(pRisultato->p2, strTemp2);

	x = len_prec + 1;

	pos1 = 0;
	pos2 = pos2x + 1;
	strTemp1[0] = '\0';
	strSearch[0] = '\0';
	len_prec = len;
	len = 0;
	numErrors = 0;

	memcpy(strSearch, p2 + pos2, x);
	*(strSearch + x) = '\0';

	k = 0;
	while ( *(p1 + k) != *(p2 + k) )
		k++;
	pos1 = k;

	while ( pos1 < p1_len - x )
	{	
		memcpy(strTemp1, p1 + pos1, x);
		*(strTemp1 + x) = '\0';

		k = 0;
		while ( k < x )
		{
			if ( *(strSearch + k) != *(strTemp1 + k) )
				numErrors++;
			if ( numErrors > 2 )
				break;
			k++;
		}
		if ( k >= x && len_prec > len )
		{
			len = k;
			pos1x = pos1;
			pos2x = pos2;
			len_prec = len;

			memcpy(strTemp1, p1 + pos1x, len);
			*(strTemp1 + len) = '\0';

			memcpy(strTemp2, p2 + pos2x, len);
			*(strTemp2 + len) = '\0';

			sprintf(strRes,
					"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
					strTemp1,
					strTemp2,
					len,
					pos1x,
					pos2x);

			pRisultato->len = len;
			pRisultato->pos1 = pos1x;
			pRisultato->pos2 = pos2x;
			strcpy(pRisultato->p1, strTemp1);
			strcpy(pRisultato->p2, strTemp2);

			break;
		}

		pos1++;
	}

	c_end = clock();

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", (double)(c_end - c_start) / CLOCKS_PER_SEC);

	pRisultato->tempo = (double)(c_end - c_start) / CLOCKS_PER_SEC;

	fp = fopen("Risultato.txt", "a");
	fwrite(strRes, strlen(strRes), 1, fp);
	fwrite(strTempo, strlen(strTempo), 1, fp);
	fclose(fp);

	free(p1);
	free(p2);
}

int main()
{
	Risultato ris;
	char strTempo[512];
	char strRes[1024];

	Trova(FILE1, FILE2, &ris);

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			ris.p1,
			ris.p2,
			ris.len,
			ris.pos1,
			ris.pos2);

	printf(strRes);

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", ris.tempo);

	printf(strTempo);

	return 0;
}

Vincenzo1968 · 10-08-2008, 06:12

Quote:

Originariamente inviato da gugoXX

Rilancio di nuovo

Con questo codice.

Codice:

static void Main(string[] args)
{
    string input1 ="";
    string input2 = "";
   
    input1 = File.ReadAllText(@"C:\temp\DNA1.txt");
    input2 = File.ReadAllText(@"C:\temp\DNA2.txt");
                
    Stopwatch sw = new Stopwatch();
    sw.Start();            
    Run Current = TryV5.Get(input1, input2, 2);
    sw.Stop();

...

Un momento! Io, nel tempo finale, ho incluso i tempi di lettura delle stringhe dai file.

Ho tentato di compilare il tuo codice ma ho questo errore:

Codice:

Errore	1	Impossibile trovare il tipo o il nome dello spazio dei nomi 'Run'; probabilmente manca una direttiva using o un riferimento a un assembly	C:\Progetti CSharp\Gugo\Gugo\Gugo\Program.cs	17	23	Gugo

cionci · 10-08-2008, 11:12

Quote:

Originariamente inviato da Vincenzo1968

eqque qua:

Continua a darmi soluzioni strane:
Stringhe

CCACTGTCCTGTCAACAAGGAGT
GGACTGTCCTGTCAACAAGGAGT

di lunghezza 23 trovate alle posizioni 10000 e 40000

Tempo impiegato -> 0.18000 secondi

Secondo me c'è qualche memory leak, altrimenti mi sembra strano che dia soluzioni diverse con compilatori diversi.

Vincenzo1968 · 10-08-2008, 17:03

Quote:

Originariamente inviato da cionci

Continua a darmi soluzioni strane:
Stringhe

CCACTGTCCTGTCAACAAGGAGT
GGACTGTCCTGTCAACAAGGAGT

di lunghezza 23 trovate alle posizioni 10000 e 40000

Tempo impiegato -> 0.18000 secondi

Secondo me c'è qualche memory leak, altrimenti mi sembra strano che dia soluzioni diverse con compilatori diversi.

Vero è

Col visual c++ il risultato è corretto, mentre col watcom(che ho scaricato e installato appositamente), il risultato è sbagliato.

Io mi arrendo. Passo il resto delle ferie lontano dal computer. Per il punto A la mia soluzione è quella con i suffix tree che ho postato all'inizio.

A rileggervi a settembre.
Buone vacanze a tutti

Vincenzo1968 · 12-08-2008, 01:30

Quote:

Originariamente inviato da Vincenzo1968

Vero è

Col visual c++ il risultato è corretto, mentre col watcom(che ho scaricato e installato appositamente), il risultato è sbagliato.

Io mi arrendo. Passo il resto delle ferie lontano dal computer. Per il punto A la mia soluzione è quella con i suffix tree che ho postato all'inizio.

A rileggervi a settembre.
Buone vacanze a tutti

Ho trovato l'errore.

Questa linea(il campo key dell'albero binario):

Codice:

...
	char key[PREFIX_SIZE];
...

va sostituita con questa:

Codice:

...
	char key[PREFIX_SIZE + 1];
...

Funziona sia col visual studio che col watcom.
Tempo impiegato -> 0.19400 secondi

Vincenzo1968 · 12-08-2008, 02:16

Ho adattato il codice per il punto A da quello per il punto B. Posto le versioni per i due punti:

Punto A:

Codice:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <time.h>

#define FILE1 "dna1.txt"
#define FILE2 "dna2.txt"

//#define FILE1 "dna1b.txt"
//#define FILE2 "dna2b.txt"

//#define FILE1 "dna1c.txt"
//#define FILE2 "dna2c.txt"

#define BUFFER_SIZE 4096
#define MAX_STACK 100
#define PREFIX_SIZE 8

typedef struct tagRisultato
{
	int pos1;
	int pos2;
	char p1[1024];
	char p2[1024];
	int len;
	double tempo;
} Risultato;

typedef struct tagLista
{
	int pos;
	struct tagLista* next;
} Lista;

typedef struct tagTree
{
	char key[PREFIX_SIZE + 1];
	Lista *pLista;
	struct tagTree *left;
	struct tagTree *right;
} Tree;

Lista* ListNewNode(int val);
Lista* ListAppend(Lista* first, int val);
void ListFree(Lista* first);

Tree *TreeNewNode(char *pKey, int pos);
Tree *TreeInsertNode(Tree *node, char * pKey, int pos);
void TreeSearch(Tree *head, Tree **result, char *pKey);
void TreeFree(Tree *head);

Lista* ListNewNode(int val)
{
	Lista *n;

	n = (Lista *)malloc(sizeof(Lista));

	if( n == NULL )
		return NULL;

	n->pos = val;
	n->next = NULL;

	return n;
}

Lista* ListAppend(Lista* first, int val)
{
	Lista *n = first, *nuovo;

	if ( first == NULL )
		return ListNewNode(val);

	n = first;
	while( n->next != NULL )
	{
		n = n->next;
	}

	nuovo = ListNewNode(val);
	n->next = nuovo;

	return first;
}

void ListFree(Lista* first)
{
	Lista *n1 = first, *n2;
	while ( n1 != NULL )
	{
		n2 = n1->next;
		free(n1);
		n1 = n2;
	}
}

Tree *TreeNewNode(char *pKey, int pos)
{
	Tree *r;
	Lista *l;

	r = (Tree *) malloc(sizeof(Tree));
	if(!r)
	{
		printf("Memoria insufficiente.\n");
		return NULL;
	}

	l = (Lista *) malloc(sizeof(Lista));
	if(!l)
	{
		printf("Memoria insufficiente.\n");
		return NULL;
	}
	l->pos = pos;
	l->next = NULL;
	

	strcpy(r->key, pKey);
	r->pLista = l;
	r->left = NULL;
	r->right = NULL;

	return r;
}

Tree *TreeInsertNode(Tree *node, char * pKey, int pos)
{
	int res;
	Tree *pRadice = NULL;

	if( !node )
	{
		node = TreeNewNode(pKey, pos);
		return node;
	}

	pRadice = node;
	
	while( 1 )
	{
		res = strcmp(pKey, node->key);
		if ( res < 0 )
		{
			if ( !node->left )
			{
				node->left = TreeNewNode(pKey, pos);
				break;
			}
			node = node->left;
		}
		else if ( res > 0 )
		{
			if ( !node->right )
			{
				node->right = TreeNewNode(pKey, pos);
				break;
			}
			node = node->right;
		}
		else
		{
			node->pLista = ListAppend(node->pLista, pos);
			break;
		}

	}

	node = pRadice;

	return node;
}

void TreeSearch(Tree *head, Tree **result, char *pKey)
{
	int res;
	Tree *node;

	*result = NULL;
	node = head;

	if ( !head )
		return;

	while( 1 )
	{
		res = strcmp(pKey, node->key);
		if ( res < 0 )
		{
			if ( !node->left )
				break;
			node = node->left;
		}
		else if ( res > 0 )
		{
			if ( !node->right )
				break;
			node = node->right;
		}
		else // key == node->data
		{
			*result = node;
			break;
		}
	}
}

void TreeFree(Tree *head)
{
	Tree *temp1, *temp2;

	Tree *stack[MAX_STACK];
	int top;

	top = 0;
 
	if ( !head )
		return;

	temp1 = temp2 = head;

	while ( temp1 != NULL )
	{
		for(; temp1->left != NULL; temp1 = temp1->left)
			stack[top++] = temp1;

		while ( (temp1 != NULL) && (temp1->right == NULL || temp1->right == temp2) )
		{
			temp2 = temp1;
			ListFree(temp2->pLista);
			free(temp2);
			if ( top == 0 )
				return;
			temp1 = stack[--top];
		}

		stack[top++] = temp1;
		temp1 = temp1->right;
	}
}

int LeggiDimensioniFile(char *szFileName)
{
	FILE *fp;
	long k;

	fp = fopen(szFileName, "rb");

	if ( fp == NULL )
		return 0;

	if ( fseek(fp, 0, SEEK_END) )
	{
		fclose(fp);
		return 0;
	}

	k = ftell(fp);

	fclose(fp);

	return k;
}

int LeggiStringa(char *szFileName, char *buffer, int dimFile)
{
	FILE *fp;

	fp = fopen(szFileName, "r");

	if ( fp == NULL )
		return 0;

	if ( fgets(buffer, dimFile+1, fp) == NULL )
	{
		printf("\nErrore nella lettura del file %s\n", szFileName);
		fclose(fp);
		return 0;
	}
	*(buffer + dimFile) = '\0';

	fclose(fp);

	return dimFile;
}

void Trova(char *szNomeFile1, char *szNomeFile2, Risultato *pRisultato)
{
	FILE *fp;
	char strTempo[512];

	Tree *pTree;
	Tree *pNode;
	Lista *pLista;
	int p1_len, p2_len;
	int k;
	int len, len_prec;
	int bTrovato;
	int MaxPrefix;

	int x;

	char strSearch[1024] = "";

	char strRes[1024] = "";
	char strTemp1[1024] = "";
	char strTemp2[1024] = "";

	clock_t c_start, c_end;

	int pos1, pos2;
	int pos1x, pos2x;

	int post1, post2;

	char *p1 = NULL;
	char *p2 = NULL;

	c_start = clock();

	p1_len = LeggiDimensioniFile(szNomeFile1);
	p2_len = LeggiDimensioniFile(szNomeFile2);

	p1 = (char*)malloc(sizeof(char)*p1_len + 1);
	if ( !p1 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p1[0] = '\0';

	p2 = (char*)malloc(sizeof(char)*p2_len + 1);
	if ( !p2 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p2[0] = '\0';

	if ( !LeggiStringa(szNomeFile1, p1, p1_len) )
		return;

	if ( !LeggiStringa(szNomeFile2, p2, p2_len) )
		return;

	if ( PREFIX_SIZE < p2_len )
		MaxPrefix = PREFIX_SIZE;
	else
		MaxPrefix = p2_len;

	if ( p1_len < PREFIX_SIZE )
	{
		printf("\n\nLa stringa piu' lunga risulta composta da %d caratteri.\nFai prima se ti apri i due file con blocco note e te le cerchi da solo.\nCiao ciao.\n\n", p1_len);

		pRisultato->len = 0;
		pRisultato->pos1 = 0;
		pRisultato->pos2 = 0;
		strcpy(pRisultato->p1, strTemp1);
		strcpy(pRisultato->p2, strTemp2);
		pRisultato->tempo = 0;

		free(p1);
		free(p2);

		return;
	}
	
	for ( x = MaxPrefix; x > 0; x--)
	{
		pTree = NULL;
		pos1 = 0;
		while ( pos1 < p1_len - x )
		{
			memcpy(strSearch, p1 + pos1, x);
			*(strSearch + x) = '\0';

			pTree = TreeInsertNode(pTree, strSearch, pos1);

			pos1++;
		}

		pos1 = 0;
		pos2 = 0;
		strTemp1[0] = '\0';
		strTemp2[0] = '\0';
		strSearch[0] = '\0';
		pLista = NULL;
		pNode = NULL;
		len = 0;
		len_prec = 0;
		bTrovato = 0;

		while ( pos2 < p2_len - x )
		{
			memcpy(strSearch, p2 + pos2, x);
			*(strSearch + x) = '\0';

			TreeSearch(pTree, &pNode, strSearch);
			if ( pNode != NULL )
			{
				pLista = pNode->pLista;

				while ( pLista != NULL )
				{
					pos1 = pLista->pos;
					k = x;
					post1 = pos1;
					post2 = pos2;
					if ( pos2 > x && pos1 > x )
					{
						k = 0;
						while ( *(p2 + pos2) == *(p1 + pos1) )
						{
							pos1--;
							pos2--;
							if ( pos2 < 0 || pos1 < 0 )
								break;
						}
						pos1 += 1;
						pos2 += 1;
					}
					while ( *(p2 + pos2 + k) == *(p1 + pos1 + k) )
						k++;
					if ( k > len_prec )
					{
						len = k;
						pos1x = pos1;
						pos2x = pos2;
						len_prec = len;
						bTrovato = 1;
					}
					pos2 = post2;
					pLista = pLista->next;
				}
			}

			pos2++;
		}
		TreeFree(pTree);
		if ( bTrovato )
			break;
	}

	pos1 = pos1x;
	pos2 = pos2x;
	len_prec = len;
	while ( *(p1 + pos1) == *(p2 + pos2) )
	{
		pos2++;
		pos1++;
	}
	len = 0;
	while ( *(p1 + pos1 + len) == *(p2 + pos2 + len) )
		len++;
	if ( len_prec < len )
	{
		pos1x = pos1;
		pos2x = pos2;
	}
	else
	{
		len = len_prec;
	}

	memcpy(strTemp1, p1 + pos1x, len);
	*(strTemp1 + len) = '\0';

	memcpy(strTemp2, p2 + pos2x, len);
	*(strTemp2 + len) = '\0';

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			strTemp1,
			strTemp2,
			len,
			pos1x,
			pos2x);

	pRisultato->len = len;
	pRisultato->pos1 = pos1x;
	pRisultato->pos2 = pos2x;
	strcpy(pRisultato->p1, strTemp1);
	strcpy(pRisultato->p2, strTemp2);

	x = len_prec + 1;

	pos1 = 0;
	pos2 = pos2x + 1;
	strTemp1[0] = '\0';
	strSearch[0] = '\0';
	len_prec = len;
	len = 0;

	memcpy(strSearch, p2 + pos2, x);
	*(strSearch + x) = '\0';

	k = 0;
	while ( *(p1 + k) != *(p2 + k) )
		k++;
	pos1 = k;

	while ( pos1 < p1_len - x )
	{	
		memcpy(strTemp1, p1 + pos1, x);
		*(strTemp1 + x) = '\0';

		k = 0;
		/*
		while ( k < x )
		{
			if ( *(strSearch + k) != *(strTemp1 + k) )
				numErrors++;
			if ( numErrors > 2 )
				break;
			k++;
		}
		*/
		if ( k >= x && len_prec > len )
		{
			len = k;
			pos1x = pos1;
			pos2x = pos2;
			len_prec = len;

			memcpy(strTemp1, p1 + pos1x, len);
			*(strTemp1 + len) = '\0';

			memcpy(strTemp2, p2 + pos2x, len);
			*(strTemp2 + len) = '\0';

			sprintf(strRes,
					"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
					strTemp1,
					strTemp2,
					len,
					pos1x,
					pos2x);

			pRisultato->len = len;
			pRisultato->pos1 = pos1x;
			pRisultato->pos2 = pos2x;
			strcpy(pRisultato->p1, strTemp1);
			strcpy(pRisultato->p2, strTemp2);

			break;
		}

		pos1++;
	}

	c_end = clock();

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", (double)(c_end - c_start) / CLOCKS_PER_SEC);

	pRisultato->tempo = (double)(c_end - c_start) / CLOCKS_PER_SEC;

	fp = fopen("Risultato.txt", "a");
	fwrite(strRes, strlen(strRes), 1, fp);
	fwrite(strTempo, strlen(strTempo), 1, fp);
	fclose(fp);

	free(p1);
	free(p2);
}

int main()
{
	Risultato ris;
	char strTempo[512];
	char strRes[1024];

	Trova(FILE1, FILE2, &ris);

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			ris.p1,
			ris.p2,
			ris.len,
			ris.pos1,
			ris.pos2);

	printf(strRes);

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", ris.tempo);

	printf(strTempo);

	return 0;
}

Punto B:

Codice:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <time.h>

#define FILE1 "dna1.txt"
#define FILE2 "dna2.txt"

//#define FILE1 "dna1b.txt"
//#define FILE2 "dna2b.txt"

//#define FILE1 "dna1c.txt"
//#define FILE2 "dna2c.txt"

#define BUFFER_SIZE 4096
#define MAX_STACK 100
#define PREFIX_SIZE 8

typedef struct tagRisultato
{
	int pos1;
	int pos2;
	char p1[1024];
	char p2[1024];
	int len;
	double tempo;
} Risultato;

typedef struct tagLista
{
	int pos;
	struct tagLista* next;
} Lista;

typedef struct tagTree
{
	char key[PREFIX_SIZE + 1];
	Lista *pLista;
	struct tagTree *left;
	struct tagTree *right;
} Tree;

Lista* ListNewNode(int val);
Lista* ListAppend(Lista* first, int val);
void ListFree(Lista* first);

Tree *TreeNewNode(char *pKey, int pos);
Tree *TreeInsertNode(Tree *node, char * pKey, int pos);
void TreeSearch(Tree *head, Tree **result, char *pKey);
void TreeFree(Tree *head);

Lista* ListNewNode(int val)
{
	Lista *n;

	n = (Lista *)malloc(sizeof(Lista));

	if( n == NULL )
		return NULL;

	n->pos = val;
	n->next = NULL;

	return n;
}

Lista* ListAppend(Lista* first, int val)
{
	Lista *n = first, *nuovo;

	if ( first == NULL )
		return ListNewNode(val);

	n = first;
	while( n->next != NULL )
	{
		n = n->next;
	}

	nuovo = ListNewNode(val);
	n->next = nuovo;

	return first;
}

void ListFree(Lista* first)
{
	Lista *n1 = first, *n2;
	while ( n1 != NULL )
	{
		n2 = n1->next;
		free(n1);
		n1 = n2;
	}
}

Tree *TreeNewNode(char *pKey, int pos)
{
	Tree *r;
	Lista *l;

	r = (Tree *) malloc(sizeof(Tree));
	if(!r)
	{
		printf("Memoria insufficiente.\n");
		return NULL;
	}

	l = (Lista *) malloc(sizeof(Lista));
	if(!l)
	{
		printf("Memoria insufficiente.\n");
		return NULL;
	}
	l->pos = pos;
	l->next = NULL;
	

	strcpy(r->key, pKey);
	r->pLista = l;
	r->left = NULL;
	r->right = NULL;

	return r;
}

Tree *TreeInsertNode(Tree *node, char * pKey, int pos)
{
	int res;
	Tree *pRadice = NULL;

	if( !node )
	{
		node = TreeNewNode(pKey, pos);
		return node;
	}

	pRadice = node;
	
	while( 1 )
	{
		res = strcmp(pKey, node->key);
		if ( res < 0 )
		{
			if ( !node->left )
			{
				node->left = TreeNewNode(pKey, pos);
				break;
			}
			node = node->left;
		}
		else if ( res > 0 )
		{
			if ( !node->right )
			{
				node->right = TreeNewNode(pKey, pos);
				break;
			}
			node = node->right;
		}
		else
		{
			node->pLista = ListAppend(node->pLista, pos);
			break;
		}

	}

	node = pRadice;

	return node;
}

void TreeSearch(Tree *head, Tree **result, char *pKey)
{
	int res;
	Tree *node;

	*result = NULL;
	node = head;

	if ( !head )
		return;

	while( 1 )
	{
		res = strcmp(pKey, node->key);
		if ( res < 0 )
		{
			if ( !node->left )
				break;
			node = node->left;
		}
		else if ( res > 0 )
		{
			if ( !node->right )
				break;
			node = node->right;
		}
		else // key == node->data
		{
			*result = node;
			break;
		}
	}
}

void TreeFree(Tree *head)
{
	Tree *temp1, *temp2;

	Tree *stack[MAX_STACK];
	int top;

	top = 0;
 
	if ( !head )
		return;

	temp1 = temp2 = head;

	while ( temp1 != NULL )
	{
		for(; temp1->left != NULL; temp1 = temp1->left)
			stack[top++] = temp1;

		while ( (temp1 != NULL) && (temp1->right == NULL || temp1->right == temp2) )
		{
			temp2 = temp1;
			ListFree(temp2->pLista);
			free(temp2);
			if ( top == 0 )
				return;
			temp1 = stack[--top];
		}

		stack[top++] = temp1;
		temp1 = temp1->right;
	}
}

int LeggiDimensioniFile(char *szFileName)
{
	FILE *fp;
	long k;

	fp = fopen(szFileName, "rb");

	if ( fp == NULL )
		return 0;

	if ( fseek(fp, 0, SEEK_END) )
	{
		fclose(fp);
		return 0;
	}

	k = ftell(fp);

	fclose(fp);

	return k;
}

int LeggiStringa(char *szFileName, char *buffer, int dimFile)
{
	FILE *fp;

	fp = fopen(szFileName, "r");

	if ( fp == NULL )
		return 0;

	if ( fgets(buffer, dimFile+1, fp) == NULL )
	{
		printf("\nErrore nella lettura del file %s\n", szFileName);
		fclose(fp);
		return 0;
	}
	*(buffer + dimFile) = '\0';

	fclose(fp);

	return dimFile;
}

void Trova(char *szNomeFile1, char *szNomeFile2, Risultato *pRisultato)
{
	FILE *fp;
	char strTempo[512];

	Tree *pTree;
	Tree *pNode;
	Lista *pLista;
	int p1_len, p2_len;
	int k;
	int len, len_prec;
	int numErrors;
	int bTrovato;
	int MaxPrefix;

	int x;

	char strSearch[1024] = "";

	char strRes[1024] = "";
	char strTemp1[1024] = "";
	char strTemp2[1024] = "";

	clock_t c_start, c_end;

	int pos1, pos2;
	int pos1x, pos2x;

	int post1, post2;

	char *p1 = NULL;
	char *p2 = NULL;

	c_start = clock();

	p1_len = LeggiDimensioniFile(szNomeFile1);
	p2_len = LeggiDimensioniFile(szNomeFile2);

	p1 = (char*)malloc(sizeof(char)*p1_len + 1);
	if ( !p1 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p1[0] = '\0';

	p2 = (char*)malloc(sizeof(char)*p2_len + 1);
	if ( !p2 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p2[0] = '\0';

	if ( !LeggiStringa(szNomeFile1, p1, p1_len) )
		return;

	if ( !LeggiStringa(szNomeFile2, p2, p2_len) )
		return;

	if ( PREFIX_SIZE < p2_len )
		MaxPrefix = PREFIX_SIZE;
	else
		MaxPrefix = p2_len;

	if ( p1_len < PREFIX_SIZE )
	{
		printf("\n\nLa stringa piu' lunga risulta composta da %d caratteri.\nFai prima se ti apri i due file con blocco note e te le cerchi da solo.\nCiao ciao.\n\n", p1_len);

		pRisultato->len = 0;
		pRisultato->pos1 = 0;
		pRisultato->pos2 = 0;
		strcpy(pRisultato->p1, strTemp1);
		strcpy(pRisultato->p2, strTemp2);
		pRisultato->tempo = 0;

		free(p1);
		free(p2);

		return;
	}
	
	for ( x = MaxPrefix; x > 0; x--)
	{
		pTree = NULL;
		pos1 = 0;
		while ( pos1 < p1_len - x )
		{
			memcpy(strSearch, p1 + pos1, x);
			*(strSearch + x) = '\0';

			pTree = TreeInsertNode(pTree, strSearch, pos1);

			pos1++;
		}

		pos1 = 0;
		pos2 = 0;
		strTemp1[0] = '\0';
		strTemp2[0] = '\0';
		strSearch[0] = '\0';
		pLista = NULL;
		pNode = NULL;
		len = 0;
		len_prec = 0;
		numErrors = 0;
		bTrovato = 0;

		while ( pos2 < p2_len - x )
		{
			memcpy(strSearch, p2 + pos2, x);
			*(strSearch + x) = '\0';

			TreeSearch(pTree, &pNode, strSearch);
			if ( pNode != NULL )
			{
				pLista = pNode->pLista;

				while ( pLista != NULL )
				{
					pos1 = pLista->pos;
					numErrors = 0;
					k = x;
					post1 = pos1;
					post2 = pos2;
					if ( pos2 > x && pos1 > x )
					{
						k = 0;
						while ( (*(p2 + pos2) == *(p1 + pos1)) || (numErrors < 2) )
						{
							if ( *(p2 + pos2) != *(p1 + pos1) )
								numErrors++;
							pos1--;
							pos2--;
							if ( pos2 < 0 || pos1 < 0 )
								break;
						}
						pos1 += 1;
						pos2 += 1;
						numErrors = 0;
					}
					while ( (*(p2 + pos2 + k) == *(p1 + pos1 + k)) || (numErrors < 2) )
					{
						if ( *(p2 + pos2 + k) != *(p1 + pos1 + k) )
							numErrors++;
						k++;
					}
					if ( k > len_prec )
					{
						len = k;
						pos1x = pos1;
						pos2x = pos2;
						len_prec = len;

						bTrovato = 1;
					}
					pos2 = post2;
					pLista = pLista->next;
				}
			}

			pos2++;
		}
		TreeFree(pTree);
		if ( bTrovato )
			break;
	}

	pos1 = pos1x;
	pos2 = pos2x;
	len_prec = len;
	numErrors = 0;
	while ( (*(p1 + pos1) == *(p2 + pos2)) || (numErrors < 2) )
	{
		if ( *(p2 + pos2) != *(p1 + pos1) )
			numErrors++;
		pos2++;
		pos1++;
		if ( numErrors == 2 )
			break;
	}
	len = 0;
	numErrors = 0;
	while ( (*(p1 + pos1 + len) == *(p2 + pos2 + len)) || (numErrors < 2) )
	{
		if ( *(p2 + pos2 + len) != *(p1 + pos1 + len) )
			numErrors++;
		len++;
	}
	if ( len_prec < len )
	{
		pos1x = pos1;
		pos2x = pos2;
	}
	else
	{
		len = len_prec;
	}

	memcpy(strTemp1, p1 + pos1x, len);
	*(strTemp1 + len) = '\0';

	memcpy(strTemp2, p2 + pos2x, len);
	*(strTemp2 + len) = '\0';

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			strTemp1,
			strTemp2,
			len,
			pos1x,
			pos2x);

	pRisultato->len = len;
	pRisultato->pos1 = pos1x;
	pRisultato->pos2 = pos2x;
	strcpy(pRisultato->p1, strTemp1);
	strcpy(pRisultato->p2, strTemp2);

	x = len_prec + 1;

	pos1 = 0;
	pos2 = pos2x + 1;
	strTemp1[0] = '\0';
	strSearch[0] = '\0';
	len_prec = len;
	len = 0;
	numErrors = 0;

	memcpy(strSearch, p2 + pos2, x);
	*(strSearch + x) = '\0';

	k = 0;
	while ( *(p1 + k) != *(p2 + k) )
		k++;
	pos1 = k;

	while ( pos1 < p1_len - x )
	{	
		memcpy(strTemp1, p1 + pos1, x);
		*(strTemp1 + x) = '\0';

		k = 0;
		while ( k < x )
		{
			if ( *(strSearch + k) != *(strTemp1 + k) )
				numErrors++;
			if ( numErrors > 2 )
				break;
			k++;
		}
		if ( k >= x && len_prec > len )
		{
			len = k;
			pos1x = pos1;
			pos2x = pos2;
			len_prec = len;

			memcpy(strTemp1, p1 + pos1x, len);
			*(strTemp1 + len) = '\0';

			memcpy(strTemp2, p2 + pos2x, len);
			*(strTemp2 + len) = '\0';

			sprintf(strRes,
					"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
					strTemp1,
					strTemp2,
					len,
					pos1x,
					pos2x);

			pRisultato->len = len;
			pRisultato->pos1 = pos1x;
			pRisultato->pos2 = pos2x;
			strcpy(pRisultato->p1, strTemp1);
			strcpy(pRisultato->p2, strTemp2);

			break;
		}

		pos1++;
	}

	c_end = clock();

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", (double)(c_end - c_start) / CLOCKS_PER_SEC);

	pRisultato->tempo = (double)(c_end - c_start) / CLOCKS_PER_SEC;

	fp = fopen("Risultato.txt", "a");
	fwrite(strRes, strlen(strRes), 1, fp);
	fwrite(strTempo, strlen(strTempo), 1, fp);
	fclose(fp);

	free(p1);
	free(p2);
}

int main()
{
	Risultato ris;
	char strTempo[512];
	char strRes[1024];

	Trova(FILE1, FILE2, &ris);

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			ris.p1,
			ris.p2,
			ris.len,
			ris.pos1,
			ris.pos2);

	printf(strRes);

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", ris.tempo);

	printf(strTempo);

	return 0;
}

In entrambi i casi, il tempo ottenuto, sulla mia macchina, è di circa 0.2 secondi

Vincenzo1968 · 12-08-2008, 03:55

Ho effettuato parecchie prove con entrambi i compilatori, con stringhe diverse e in diverse posizioni, e sembrerebbe tutto a posto.

Per esempio, per il punto B:

Cionci, ti sarei grato se volessi confermare i risultati anche col tuo compilatore(se non ricordo male usi gcc in ambiente Unix/Linux?).

gugoXX · 12-08-2008, 08:56

Metto anche io il codice completo

Quote:

Maximum common length 29
Maximum substring indexes 50002,70002
Substring1: ACTGTCCTGAAGATCGCTTGGCATCTCCG
Substring2: ACTGTCCTGCAGATCGCTTTGCATCTCCG
259ms

Codice:

public class Program
{
    static void Main(string[] args)
    {
        string input1 = "";
        string input2 = "";

        input1 = File.ReadAllText(@"C:\temp\DNA1.txt");
        input2 = File.ReadAllText(@"C:\temp\DNA2.txt");

        //Randomizer rnd = new Randomizer();
        //rnd.SetRandomTest();
        //input1 = rnd.input1;
        //input2 = rnd.input2;

        Stopwatch sw = new Stopwatch();
        sw.Start();
        Run Current = TryV5.Get(input1, input2, 2);
        sw.Stop();

        Console.ForegroundColor = ConsoleColor.White;
        Console.WriteLine("Maximum common length {0}", Current.length);
        Console.WriteLine("Maximum substring indexes {0},{1}", Current.index0, Current.index1);

        string s0 = input1.Substring(Current.index0, Current.length);
        string s1 = input2.Substring(Current.index1, Current.length);

        Console.Write("Substring1: ");
        for (int t = 0; t < Current.length; t++)
        {
            if (s0[t] == s1[t]) Console.ForegroundColor = ConsoleColor.White;
            else Console.ForegroundColor = ConsoleColor.Red;
            Console.Write(s0[t]);
        }
        Console.WriteLine();
        Console.Write("Substring2: ");
        for (int t = 0; t < Current.length; t++)
        {
            if (s0[t] == s1[t]) Console.ForegroundColor = ConsoleColor.White;
            else Console.ForegroundColor = ConsoleColor.Red;
            Console.Write(s1[t]);
        }
        Console.WriteLine();

        Console.WriteLine("{0}ms", sw.ElapsedMilliseconds);
        Console.ReadKey();
    }
}

public class TryV5
{
    static string input1;
    static string input2;
    static int len1;
    static int len2;
    static int ERR;

    public static Run Get(string i_input1, string i_input2, int i_ERR)
    {
        input1 = i_input1;
        input2 = i_input2;
        if (input2.Length > input1.Length)
        {
            string tmp = input1;
            input1 = input2;
            input2 = tmp;
        }

        ERR = i_ERR;

        len1 = input1.Length;
        len2 = input2.Length;

        var Upper = new Dictionary<int, Dictionary<string, List<int>>>(100);
        var Lower = new Dictionary<int, Dictionary<string, List<int>>>(100);

        int length = (int)(Math.Log(len1, 4));
        //int length = 6;
        bool stay;
        do
        {
            length++;

            Dictionary<string, List<int>> mup, mdw;
            Get2Dicts(length, out mup, out mdw);
            Upper[length] = mup;
            Lower[length] = mdw;

            var tst = from m in mup
                      join n in mdw on m.Key equals n.Key
                      select true;

            int lntest = Math.Min(mup.Count, 1 << 19);
            int take = tst.Take(lntest).Count();
            stay = (take == lntest);
            if (take == 0) length -= 2;
        } while (stay);

        Run Winner = new Run();
        int minlen = GetMinLen(length);

        for (int ln = length; ln >= minlen; ln--)
        {
            Dictionary<string, List<int>> mup;
            Dictionary<string, List<int>> mdw;
            if (Upper.ContainsKey(ln))
            {
                mup = Upper[ln];
                mdw = Lower[ln];
            }
            else
            {
                Get2Dicts(ln, out mup, out mdw);
            }

            // Parallelismo qui
            var tst = from m in mup
                      join n in mdw on m.Key equals n.Key
                      from u in m.Value
                      from d in n.Value
                      select Search(u, d, ln);

            Run best = tst.Max();
            if (best.length > Winner.length)
            {
                Winner = best;
                minlen = GetMinLen(best.length);
                if (minlen > ln) break;
            }
        }
        return Winner;
    }

    public static void Get2Dicts(int length, out Dictionary<string, List<int>> mup, out Dictionary<string, List<int>> mdw)
    {
        // Inserimento parallelismo qui
        mup = BuildDict(input1, length);
        mdw = BuildDict(input2, length);
    }

    public static int GetMinLen(int curmaxlen)
    {
        int minlen = (int)Math.Ceiling((float)(curmaxlen) / (float)(ERR + 1));
        return minlen;
    }

    private static Run Search(int upoffset, int downoffset, int sure)
    {
        int[] Pre = new int[ERR + 1];
        int[] Post = new int[ERR + 2];

        //SearchPost

        bool lasterr = false;
        for (int erfnd = 0, pch1 = upoffset + sure, pch2 = downoffset + sure; erfnd <= ERR && pch1 < len1 && pch2 < len2; pch1++, pch2++)
        {
            char ch1 = input1[pch1];
            char ch2 = input2[pch2];
            if (ch1 == ch2)
            {
                Post[erfnd]++;
                lasterr = false;
            }
            else
            {
                if (lasterr) break;
                erfnd++;
                lasterr = true;
            }
        }
        sure += Post[0];

        //SearchPre
        lasterr = true;
        for (int erfnd = 0, pch1 = upoffset - 2, pch2 = downoffset - 2; erfnd < ERR && pch1 >= 0 && pch2 >= 0; pch1--, pch2--)
        {
            char ch1 = input1[pch1];
            char ch2 = input2[pch2];
            if (ch1 == ch2)
            {
                Pre[erfnd]++;
                lasterr = false;
            }
            else
            {
                if (lasterr) break;
                erfnd++;
                lasterr = true;
            }
        }

        int pr = 0;
        int po = 1;
        for (int t = 0; t < ERR; t++)
        {
            int ppd = Post[po];
            int ppr = Pre[pr];
            if ((ppd == 0) && (ppr == 0)) break;
            if (ppd > ppr) po++;
            else pr++;
        }

        int ofprim = 0;
        for (int t = 0; t < pr; t++)
        {
            ofprim += (Pre[t] + 1);
        }

        int ofdop = 0;
        for (int t = 1; t < po; t++)
        {
            ofdop += (Post[t] + 1);
        }

        int len = sure + ofprim + ofdop;
        return new Run(len, upoffset - ofprim, downoffset - ofprim);
    }

    private static Dictionary<string, List<int>> BuildDict(string input, int len)
    {
        int fin = input.Length - len;
        var ret = new Dictionary<string, List<int>>(fin);

        for (int t = 0; t < fin; t++)
        {
            string str = input.Substring(t, len);
            List<int> adder;
            if (!ret.TryGetValue(str, out adder))
            {
                adder = ret[str] = new List<int>();
            }
            adder.Add(t);
        }
        return ret;
    }
}

public class Run : IComparable<Run>
{
    public int index0;
    public int index1;
    public int length;
    public Run()
    {
        index0 = int.MinValue;
        index1 = int.MinValue;
        length = int.MinValue;
    }

    public Run(int len, int i0, int i1)
    {
        length = len;
        index0 = i0;
        index1 = i1;
    }

    #region IComparable<Run> Members

    public int CompareTo(Run other)
    {
        return length - other.length;
    }

    #endregion
}

Vincenzo1968 · 12-08-2008, 16:08

Ciao Gugo,

sulla mia macchina risulta leggermente più lento:

e ho trovato qualche piccola incongruenza. Per esempio, se nei due file mettiamo le stringhe indicate da cionci:

CCACTGCTGTGCGGATCTCTAAAAA

AACTCTCTCCGGGTCACATCCTCCA

il risultato dovrebbe essere

Codice:

Stringhe

CTGTGCGG
CTCTCCGG

di lunghezza 8 trovate alle posizioni 6 e 4

e invece è:

A questo punto, visti i tempi ridicolmente brevi che stiamo ottenendo, non sarebbe il caso di provare su file di dimensione maggiore?

gugoXX · 12-08-2008, 16:20

Quote:

Originariamente inviato da Vincenzo1968

Ciao Gugo,

...

il risultato dovrebbe essere

Codice:

Stringhe

CTGTGCGG
CTCTCCGG

di lunghezza 8 trovate alle posizioni 6 e 4

e invece è:

A questo punto, visti i tempi ridicolmente brevi che stiamo ottenendo, non sarebbe il caso di provare su file di dimensione maggiore?

Mi sa che io ho trovato per primo l'altro risultato.
Sono entrambi validi vero? e sono lunghi identicamente 8.
Va bene per provare ad allungare, se vuoi stasera (o domani penso), posso buttare giu' un paio di file da una decina di milioni di caratteri.
PS: Bello l'output colorato vero?

Vincenzo1968 · 12-08-2008, 16:32

Quote:

Originariamente inviato da gugoXX

Mi sa che io ho trovato per primo l'altro risultato.
Sono entrambi validi vero? e sono lunghi identicamente 8.
Va bene per provare ad allungare, se vuoi stasera (o domani penso), posso buttare giu' un paio di file da una decina di milioni di caratteri.
PS: Bello l'output colorato vero?

Si, sono entrambi validi e si, bellissimo l'output colorato

Vincenzo1968 · 12-08-2008, 19:30

Ho effettuato delle prove su file dieci volte più grandi. Praticamente ho tolto le due stringhe dai file originali; ho copiato e incollato i due file per dieci volte; infine, ho reinserito le stringhe in due punti a caso.

Questi sono i risultati:

Versione C#(GugoXX):

Versione C(mia):

Per effettuare i test ho modificato la main della versione C# in modo da fargli comprendere i tempi di lettura dai file(come faccio io nella versione C):

Codice:

...
            Stopwatch sw = new Stopwatch();
            sw.Start();

            input1 = File.ReadAllText("DNA1E.txt");
            input2 = File.ReadAllText("DNA2E.txt");

            //Randomizer rnd = new Randomizer();
            //rnd.SetRandomTest();
            //input1 = rnd.input1;
            //input2 = rnd.input2;

            Run Current = TryV5.Get(input1, input2, 2);
            sw.Stop();
...

gugoXX · 12-08-2008, 19:51

E' bene non inserire i tempi di caricamento per evitare gli effetti della system cache, che possono cambiare i risultati di parecchio.
Ora che hai separato la funzione di calcolo dovrebbe essere fattibile anche per te circondare solo quella con il timer.

Vincenzo1968 · 12-08-2008, 20:47

Sono riuscito a scendere sotto i due secondi utilizzando una hash table al posto dell'albero binario:

Questo è il codice:

Codice:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <malloc.h>
#include <time.h>


//#define FILE1 "dna1.txt"
//#define FILE2 "dna2.txt"

//#define FILE1 "dna1b.txt"
//#define FILE2 "dna2b.txt"

//#define FILE1 "dna1c.txt"
//#define FILE2 "dna2c.txt"

//#define FILE1 "dna1d.txt"
//#define FILE2 "dna2d.txt"

#define FILE1 "dna1e.txt"
#define FILE2 "dna2e.txt"

#define BUFFER_SIZE 4096
#define MAX_STACK 100
#define PREFIX_SIZE 8

#define DIM_HASHTABLE 131071

typedef struct tagRisultato
{
	int pos1;
	int pos2;
	char p1[1024];
	char p2[1024];
	int len;
	double tempo;
} Risultato;

typedef struct tagLista
{
	int pos;
	struct tagLista* next;
} Lista;

typedef struct tagHashTable
{
	char key[PREFIX_SIZE + 1];
	Lista *pLista;
	struct tagHashTable *next;
} HashTable;

Lista* ListNewNode(int val);
Lista* ListAppend(Lista* first, int val);
void ListFree(Lista* first);

int HashU(char *v, int M);

HashTable* HashTableNewNode(char *Key, int pos)
{
	HashTable *n;

	n = (HashTable*)malloc(sizeof(HashTable));

	if( n == NULL )
		return NULL;

	strcpy(n->key, Key);
	n->pLista = NULL;
	n->pLista = ListAppend(n->pLista, pos);
	n->next = NULL;

	return n;
}

int FindValue(HashTable **pHashTable, char *Key, int M)
{
	int index = 0;
	HashTable *t;
	int a = 31415;
	int b = 27183;
	char *s = Key;

	//for ( index = 0; *s != '\0'; s++, a = a*b % (M - 1) )
	//	index = (a*index + *s) % M;
	//if ( index < 0 )
	//	index *= -1;

	for(; *s != '\0'; s++)
		index = (a*index + *s) % M;
	if ( index < 0 )
		index *= -1;

	//printf("%s -> %d\n", Key, index);

	t = pHashTable[index];
	while ( t != NULL )
	{
		if ( strcmp(t->key, Key) == 0 )
			return index;
		t = t->next;
	}

	return -1;
}

void InsertValue(HashTable **pHashTable, char *Key, int pos, int M)
{
	int index = 0;
	HashTable *t = NULL;
	int a = 31415;
	int b = 27183;
	char *s = Key;

	//for ( index = 0; *s != '\0'; s++, a = a*b % (M - 1) )
	//	index = (a*index + *s) % M;
	//if ( index < 0 )
	//	index *= -1;

	for(; *s != '\0'; s++)
		index = (a*index + *s) % M;
	if ( index < 0 )
		index *= -1;

	//printf("%s -> %d\n", Key, index);

	t = pHashTable[index];
	if ( t == NULL )
	{
		pHashTable[index] = HashTableNewNode(Key, pos);
		return;
	}

	while ( t != NULL )
	{
		if ( strcmp(t->key, Key) == 0 )
		{
			pHashTable[index]->pLista = ListAppend(pHashTable[index]->pLista, pos);
			return;
		}
		if ( t->next == NULL )
		{
			t->next = HashTableNewNode(Key, pos);
			t = t->next;
			t->next = NULL;
			return;
		}
		t = t->next;
	}
}

Lista* ListNewNode(int val)
{
	Lista *n;

	n = (Lista *)malloc(sizeof(Lista));

	if( n == NULL )
		return NULL;

	n->pos = val;
	n->next = NULL;

	return n;
}

Lista* ListAppend(Lista* first, int val)
{
	Lista *n = first, *nuovo;

	if ( first == NULL )
		return ListNewNode(val);

	n = first;
	while( n->next != NULL )
	{
		n = n->next;
	}

	nuovo = ListNewNode(val);
	n->next = nuovo;

	return first;
}

void ListFree(Lista* first)
{
	Lista *n1 = first, *n2;
	while ( n1 != NULL )
	{
		n2 = n1->next;
		free(n1);
		n1 = n2;
	}
}

void HashTableFree(HashTable* first)
{
	HashTable *n1 = first, *n2;
	while ( n1 != NULL )
	{
		n2 = n1->next;
		ListFree(n1->pLista);
		free(n1);
		n1 = n2;
	}
}

int LeggiDimensioniFile(char *szFileName)
{
	FILE *fp;
	int numread = 0;
	int dimFile = 0;
	int count = 0;
	fpos_t pos;

	fp = fopen(szFileName, "rb");

	if ( fp == NULL )
		return 0;

	if ( fseek(fp, 0, SEEK_END) )
	{
		fclose(fp);
		return 0;
	}

	if( fgetpos(fp, &pos) != 0 )
	{
		fclose(fp);
		return 0;
	}

	fclose(fp);

	return (int)pos;
}

int LeggiStringa(char *szFileName, char *buffer, int dimFile)
{
	FILE *fp;

	fp = fopen(szFileName, "r");

	if ( fp == NULL )
		return 0;

	if ( fgets(buffer, dimFile+1, fp) == NULL )
	{
		printf("\nErrore nella lettura del file %s\n", szFileName);
		fclose(fp);
		return 0;
	}
	*(buffer + dimFile) = '\0';

	fclose(fp);

	return dimFile;
}

void Trova(char *szNomeFile1, char *szNomeFile2, Risultato *pRisultato)
{
	FILE *fp;
	char strTempo[512];

	//Tree *pTree;
	//Tree *pNode;

	HashTable *pHT[DIM_HASHTABLE];
	Lista *pNode;

	Lista *pLista;
	int p1_len, p2_len;
	int k;
	int len, len_prec;
	int numErrors;
	int bTrovato;
	int MaxPrefix;
	int index;

	int x;

	char strSearch[1024] = "";

	char strRes[1024] = "";
	char strTemp1[1024] = "";
	char strTemp2[1024] = "";

	clock_t c_start, c_end;

	int pos1, pos2;
	int pos1x, pos2x;

	int post1, post2;

	char *p1 = NULL;
	char *p2 = NULL;

	c_start = clock();

	p1_len = LeggiDimensioniFile(szNomeFile1);
	p2_len = LeggiDimensioniFile(szNomeFile2);

	p1 = (char*)malloc(sizeof(char)*p1_len + 1);
	if ( !p1 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p1[0] = '\0';

	p2 = (char*)malloc(sizeof(char)*p2_len + 1);
	if ( !p2 )
	{
		printf("Errore nell'allocazione della memoria.");
		return;
	}
	p2[0] = '\0';

	if ( !LeggiStringa(szNomeFile1, p1, p1_len) )
		return;

	if ( !LeggiStringa(szNomeFile2, p2, p2_len) )
		return;

	if ( PREFIX_SIZE < p2_len )
		MaxPrefix = PREFIX_SIZE;
	else
		MaxPrefix = p2_len;

	if ( p1_len < PREFIX_SIZE )
	{
		printf("\n\nLa stringa piu' lunga risulta composta da %d caratteri.\nFai prima se ti apri i due file con blocco note e te le cerchi da solo.\nCiao ciao.\n\n", p1_len);

		pRisultato->len = 0;
		pRisultato->pos1 = 0;
		pRisultato->pos2 = 0;
		strcpy(pRisultato->p1, strTemp1);
		strcpy(pRisultato->p2, strTemp2);
		pRisultato->tempo = 0;

		free(p1);
		free(p2);

		return;
	}

	for (k = 0; k < DIM_HASHTABLE; k++ )
		pHT[k] = NULL;

	for ( x = MaxPrefix; x > 0; x--)
	{
		for ( k = 0; k < DIM_HASHTABLE; k++ )
		{
			if ( pHT[k] != NULL )
				HashTableFree(pHT[k]);
		}

		pos1 = 0;
		while ( pos1 < p1_len - PREFIX_SIZE )
		{
			memcpy(strSearch, p1 + pos1, PREFIX_SIZE); 
			*(strSearch + PREFIX_SIZE) = '\0';

			InsertValue(pHT, strSearch, pos1, DIM_HASHTABLE);

			pos1++;
		}

		pos1 = 0;
		pos2 = 0;
		strTemp1[0] = '\0';
		strTemp2[0] = '\0';
		strSearch[0] = '\0';
		pLista = NULL;
		pNode = NULL;
		len = 0;
		len_prec = 0;
		numErrors = 0;
		bTrovato = 0;

		while ( pos2 < p2_len - x )
		{
			memcpy(strSearch, p2 + pos2, x);
			*(strSearch + x) = '\0';

			index = FindValue(pHT, strSearch, DIM_HASHTABLE);
			if ( index >= 0 )
			{
				pLista = pHT[index]->pLista;

				while ( pLista != NULL )
				{
					pos1 = pLista->pos;
					numErrors = 0;
					k = x;
					post1 = pos1;
					post2 = pos2;
					if ( pos2 > x && pos1 > x )
					{
						k = 0;
						while ( (*(p2 + pos2) == *(p1 + pos1)) || (numErrors < 2) )
						{
							if ( *(p2 + pos2) != *(p1 + pos1) )
								numErrors++;
							pos1--;
							pos2--;
							if ( pos2 < 0 || pos1 < 0 )
								break;
						}
						pos1 += 1;
						pos2 += 1;
						numErrors = 0;
					}
					while ( (*(p2 + pos2 + k) == *(p1 + pos1 + k)) || (numErrors < 2) )
					{
						if ( *(p2 + pos2 + k) != *(p1 + pos1 + k) )
							numErrors++;
						k++;
					}
					if ( k > len_prec )
					{
						len = k;
						pos1x = pos1;
						pos2x = pos2;
						len_prec = len;

						bTrovato = 1;
					}
					pos2 = post2;
					pLista = pLista->next;
				}
			}

			pos2++;
		}
		if ( bTrovato )
			break;
	}

	pos1 = pos1x;
	pos2 = pos2x;
	len_prec = len;
	numErrors = 0;
	while ( (*(p1 + pos1) == *(p2 + pos2)) || (numErrors < 2) )
	{
		if ( *(p2 + pos2) != *(p1 + pos1) )
			numErrors++;
		pos2++;
		pos1++;
		if ( numErrors == 2 )
			break;
	}
	len = 0;
	numErrors = 0;
	while ( (*(p1 + pos1 + len) == *(p2 + pos2 + len)) || (numErrors < 2) )
	{
		if ( *(p2 + pos2 + len) != *(p1 + pos1 + len) )
			numErrors++;
		len++;
	}
	if ( len_prec < len )
	{
		pos1x = pos1;
		pos2x = pos2;
	}
	else
	{
		len = len_prec;
	}

	memcpy(strTemp1, p1 + pos1x, len);
	*(strTemp1 + len) = '\0';

	memcpy(strTemp2, p2 + pos2x, len);
	*(strTemp2 + len) = '\0';

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			strTemp1,
			strTemp2,
			len,
			pos1x,
			pos2x);

	pRisultato->len = len;
	pRisultato->pos1 = pos1x;
	pRisultato->pos2 = pos2x;
	strcpy(pRisultato->p1, strTemp1);
	strcpy(pRisultato->p2, strTemp2);

	x = len_prec + 1;

	pos1 = 0;
	pos2 = pos2x + 1;
	strTemp1[0] = '\0';
	strSearch[0] = '\0';
	len_prec = len;
	len = 0;
	numErrors = 0;

	memcpy(strSearch, p2 + pos2, x);
	*(strSearch + x) = '\0';

	k = 0;
	while ( *(p1 + k) != *(p2 + k) )
		k++;
	pos1 = k;

	while ( pos1 < p1_len - x )
	{	
		memcpy(strTemp1, p1 + pos1, x);
		*(strTemp1 + x) = '\0';

		k = 0;
		while ( k < x )
		{
			if ( *(strSearch + k) != *(strTemp1 + k) )
				numErrors++;
			if ( numErrors > 2 )
				break;
			k++;
		}
		if ( k >= x && len_prec > len )
		{
			len = k;
			pos1x = pos1;
			pos2x = pos2;
			len_prec = len;

			memcpy(strTemp1, p1 + pos1x, len);
			*(strTemp1 + len) = '\0';

			memcpy(strTemp2, p2 + pos2x, len);
			*(strTemp2 + len) = '\0';

			sprintf(strRes,
					"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
					strTemp1,
					strTemp2,
					len,
					pos1x,
					pos2x);

			pRisultato->len = len;
			pRisultato->pos1 = pos1x;
			pRisultato->pos2 = pos2x;
			strcpy(pRisultato->p1, strTemp1);
			strcpy(pRisultato->p2, strTemp2);

			break;
		}

		pos1++;
	}

	c_end = clock();

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", (double)(c_end - c_start) / CLOCKS_PER_SEC);

	pRisultato->tempo = (double)(c_end - c_start) / CLOCKS_PER_SEC;

	fp = fopen("Risultato.txt", "a");
	fwrite(strRes, strlen(strRes), 1, fp);
	fwrite(strTempo, strlen(strTempo), 1, fp);
	fclose(fp);

	free(p1);
	free(p2);

	for ( k = 0; k < DIM_HASHTABLE; k++ )
		HashTableFree(pHT[k]);
}

int main()
{
	Risultato ris;
	char strTempo[512];
	char strRes[1024];

	Trova(FILE1, FILE2, &ris);

	sprintf(strRes,
			"\nStringhe\n\n%s\n%s\n\ndi lunghezza %d trovate alle posizioni %d e %d\n",
			ris.p1,
			ris.p2,
			ris.len,
			ris.pos1,
			ris.pos2);

	printf(strRes);

	sprintf(strTempo, "\nTempo impiegato -> %5.5f secondi\n", ris.tempo);
	printf(strTempo);

	return 0;
}

Vincenzo1968 · 12-08-2008, 20:58

Quote:

Originariamente inviato da gugoXX

E' bene non inserire i tempi di caricamento per evitare gli effetti della system cache, che possono cambiare i risultati di parecchio.
Ora che hai separato la funzione di calcolo dovrebbe essere fattibile anche per te circondare solo quella con il timer.

Secondo me, relativamente a questo contest, è importante considerare anche i tempi di lettura dai file.
Metti, per esempio, che un biologo debba esaminare in sequenza mille coppie di file.

Vincenzo1968 · 12-08-2008, 21:17

Una precisazione.
Nel secondo file, ho si spostato la stringa in una posizione diversa, ma mi sono dimenticato di fare il copia e incolla per dieci volte.

I risultati sono quindi da considerarsi sul file 1 che è dieci volte più grande di quello originale, mentre il file B, in dimensioni, è rimasto uguale.

I due file si possono scaricare da qui.

gugoXX · 12-08-2008, 23:54

Da me ci mette

Quote:

Maximum common length 29
Maximum substring indexes 974640,9235
Substring1: ACTGTCCTGAAGATCGCTTGGCATCTCCG
Substring2: ACTGTCCTGCAGATCGCTTTGCATCTCCG
1364ms

Anche considerando la lettura dal file.
E tu mi dirai: ma dobbiamo fare le prove sulla stessa macchina...
e io ti diro': se il tuo dottore ha deciso di includere la lettura dei file, il mio ha deciso di usare la mia macchina.

Seriamente, la lettura del file e' da levare. Se eseguo 2 volte consecutive il programma, la prima volta ci mette 1.8sec.
la seconda volta, trovandosi i file in cache ce ne mette 1.364
Ovviamente e' impossibile riuscire a tirare fuori dei dati attendibili.
Cercherei di concentrasi sui tempi degli algoritmi, tralasciando quanto piu' possibile il rumore, sia per questo che per altri Contest.

Proviamo con questo
http://www.usaupload.net/d/1j5ruhh7wyb

Vincenzo1968 · 13-08-2008, 03:45

Quote:

Originariamente inviato da gugoXX

Da me ci mette

Anche considerando la lettura dal file.
E tu mi dirai: ma dobbiamo fare le prove sulla stessa macchina...
e io ti diro': se il tuo dottore ha deciso di includere la lettura dei file, il mio ha deciso di usare la mia macchina.

Seriamente, la lettura del file e' da levare. Se eseguo 2 volte consecutive il programma, la prima volta ci mette 1.8sec.
la seconda volta, trovandosi i file in cache ce ne mette 1.364
Ovviamente e' impossibile riuscire a tirare fuori dei dati attendibili.
Cercherei di concentrasi sui tempi degli algoritmi, tralasciando quanto piu' possibile il rumore, sia per questo che per altri Contest.

Proviamo con questo
http://www.usaupload.net/d/1j5ruhh7wyb

Ok, ho tolto la lettura dai file sia dal mio che dal tuo programma.

Questo è il risultato mio:

Col tuo programma, dopo cinque minuti di vana attesa, ho dovuto riavviare windows che risulta enormemente rallentato(e ho provato più volte a riavviare il sistema e lanciare l'applicazione con nessun'altra finestra aperta).
Quanto cabasisi di memoria usa il tuo programma?

Questa è la mia macchina:

Vincenzo1968 · 13-08-2008, 04:59

Ho fatto un ultimo tentativo:

1) ho riavviato il sistema
2) ho lanciato la tua applicazione
3) mi sono vestito, ho preso la macchina, e sono andato a mangiarmi i cornetti caldi a Cefalù(circa 3 Km da dove abito).
4) sono tornato e non ho avuto, purtroppo, il piacere di vedere i risultati sulla console.

Il codice che ho usato è questo:

Codice:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Diagnostics;
using System.IO;

namespace Contest04BGugo
{
    public class Program
    {
        static void Main(string[] args)
        {
            string input1 = "";
            string input2 = "";

            input1 = File.ReadAllText("DNA1F.txt");
            input2 = File.ReadAllText("DNA2F.txt");

            //Randomizer rnd = new Randomizer();
            //rnd.SetRandomTest();
            //input1 = rnd.input1;
            //input2 = rnd.input2;

            Stopwatch sw = new Stopwatch();
            sw.Start();
            Run Current = TryV5.Get(input1, input2, 2);
            sw.Stop();

            Console.ForegroundColor = ConsoleColor.White;
            Console.WriteLine("Maximum common length {0}", Current.length);
            Console.WriteLine("Maximum substring indexes {0},{1}", Current.index0, Current.index1);

            string s0 = input1.Substring(Current.index0, Current.length);
            string s1 = input2.Substring(Current.index1, Current.length);

            Console.Write("Substring1: ");
            for (int t = 0; t < Current.length; t++)
            {
                if (s0[t] == s1[t]) Console.ForegroundColor = ConsoleColor.White;
                else Console.ForegroundColor = ConsoleColor.Red;
                Console.Write(s0[t]);
            }
            Console.WriteLine();
            Console.Write("Substring2: ");
            for (int t = 0; t < Current.length; t++)
            {
                if (s0[t] == s1[t]) Console.ForegroundColor = ConsoleColor.White;
                else Console.ForegroundColor = ConsoleColor.Red;
                Console.Write(s1[t]);
            }
            Console.WriteLine();

            Console.WriteLine("{0}ms", sw.ElapsedMilliseconds);
            Console.ReadKey();
        }
    }

    public class TryV5
    {
        static string input1;
        static string input2;
        static int len1;
        static int len2;
        static int ERR;

        public static Run Get(string i_input1, string i_input2, int i_ERR)
        {
            input1 = i_input1;
            input2 = i_input2;
            if (input2.Length > input1.Length)
            {
                string tmp = input1;
                input1 = input2;
                input2 = tmp;
            }

            ERR = i_ERR;

            len1 = input1.Length;
            len2 = input2.Length;

            var Upper = new Dictionary<int, Dictionary<string, List<int>>>(100);
            var Lower = new Dictionary<int, Dictionary<string, List<int>>>(100);

            int length = (int)(Math.Log(len1, 4));
            //int length = 6;
            bool stay;
            do
            {
                length++;

                Dictionary<string, List<int>> mup, mdw;
                Get2Dicts(length, out mup, out mdw);
                Upper[length] = mup;
                Lower[length] = mdw;

                var tst = from m in mup
                          join n in mdw on m.Key equals n.Key
                          select true;

                int lntest = Math.Min(mup.Count, 1 << 19);
                int take = tst.Take(lntest).Count();
                stay = (take == lntest);
                if (take == 0) length -= 2;
            } while (stay);

            Run Winner = new Run();
            int minlen = GetMinLen(length);

            for (int ln = length; ln >= minlen; ln--)
            {
                Dictionary<string, List<int>> mup;
                Dictionary<string, List<int>> mdw;
                if (Upper.ContainsKey(ln))
                {
                    mup = Upper[ln];
                    mdw = Lower[ln];
                }
                else
                {
                    Get2Dicts(ln, out mup, out mdw);
                }

                // Parallelismo qui
                var tst = from m in mup
                          join n in mdw on m.Key equals n.Key
                          from u in m.Value
                          from d in n.Value
                          select Search(u, d, ln);

                Run best = tst.Max();
                if (best.length > Winner.length)
                {
                    Winner = best;
                    minlen = GetMinLen(best.length);
                    if (minlen > ln) break;
                }
            }
            return Winner;
        }

        public static void Get2Dicts(int length, out Dictionary<string, List<int>> mup, out Dictionary<string, List<int>> mdw)
        {
            // Inserimento parallelismo qui
            mup = BuildDict(input1, length);
            mdw = BuildDict(input2, length);
        }

        public static int GetMinLen(int curmaxlen)
        {
            int minlen = (int)Math.Ceiling((float)(curmaxlen) / (float)(ERR + 1));
            return minlen;
        }

        private static Run Search(int upoffset, int downoffset, int sure)
        {
            int[] Pre = new int[ERR + 1];
            int[] Post = new int[ERR + 2];

            //SearchPost

            bool lasterr = false;
            for (int erfnd = 0, pch1 = upoffset + sure, pch2 = downoffset + sure; erfnd <= ERR && pch1 < len1 && pch2 < len2; pch1++, pch2++)
            {
                char ch1 = input1[pch1];
                char ch2 = input2[pch2];
                if (ch1 == ch2)
                {
                    Post[erfnd]++;
                    lasterr = false;
                }
                else
                {
                    if (lasterr) break;
                    erfnd++;
                    lasterr = true;
                }
            }
            sure += Post[0];

            //SearchPre
            lasterr = true;
            for (int erfnd = 0, pch1 = upoffset - 2, pch2 = downoffset - 2; erfnd < ERR && pch1 >= 0 && pch2 >= 0; pch1--, pch2--)
            {
                char ch1 = input1[pch1];
                char ch2 = input2[pch2];
                if (ch1 == ch2)
                {
                    Pre[erfnd]++;
                    lasterr = false;
                }
                else
                {
                    if (lasterr) break;
                    erfnd++;
                    lasterr = true;
                }
            }

            int pr = 0;
            int po = 1;
            for (int t = 0; t < ERR; t++)
            {
                int ppd = Post[po];
                int ppr = Pre[pr];
                if ((ppd == 0) && (ppr == 0)) break;
                if (ppd > ppr) po++;
                else pr++;
            }

            int ofprim = 0;
            for (int t = 0; t < pr; t++)
            {
                ofprim += (Pre[t] + 1);
            }

            int ofdop = 0;
            for (int t = 1; t < po; t++)
            {
                ofdop += (Post[t] + 1);
            }

            int len = sure + ofprim + ofdop;
            return new Run(len, upoffset - ofprim, downoffset - ofprim);
        }

        private static Dictionary<string, List<int>> BuildDict(string input, int len)
        {
            int fin = input.Length - len;
            var ret = new Dictionary<string, List<int>>(fin);

            for (int t = 0; t < fin; t++)
            {
                string str = input.Substring(t, len);
                List<int> adder;
                if (!ret.TryGetValue(str, out adder))
                {
                    adder = ret[str] = new List<int>();
                }
                adder.Add(t);
            }
            return ret;
        }
    }

    public class Run : IComparable<Run>
    {
        public int index0;
        public int index1;
        public int length;
        public Run()
        {
            index0 = int.MinValue;
            index1 = int.MinValue;
            length = int.MinValue;
        }

        public Run(int len, int i0, int i1)
        {
            length = len;
            index0 = i0;
            index1 = i1;
        }

        #region IComparable<Run> Members

        public int CompareTo(Run other)
        {
            return length - other.length;
        }

        #endregion
    }
}

12-08-2008, 19:51	#114
gugoXX Senior Member Iscritto dal: May 2004 Città: Londra (Torino) Messaggi: 3691	E' bene non inserire i tempi di caricamento per evitare gli effetti della system cache, che possono cambiare i risultati di parecchio. Ora che hai separato la funzione di calcolo dovrebbe essere fattibile anche per te circondare solo quella con il timer. __________________ Se pensi che il tuo codice sia troppo complesso da capire senza commenti, e' segno che molto probabilmente il tuo codice e' semplicemente mal scritto. E se pensi di avere bisogno di un nuovo commento, significa che ti manca almeno un test.

12-08-2008, 21:17	#117
Vincenzo1968 Bannato Iscritto dal: Mar 2008 Città: Villabate(PA) Messaggi: 2515	Una precisazione. Nel secondo file, ho si spostato la stringa in una posizione diversa, ma mi sono dimenticato di fare il copia e incolla per dieci volte. I risultati sono quindi da considerarsi sul file 1 che è dieci volte più grande di quello originale, mentre il file B, in dimensioni, è rimasto uguale. I due file si possono scaricare da qui. Ultima modifica di Vincenzo1968 : 12-08-2008 alle 21:31.

12-08-2008, 03:55	#108
Vincenzo1968 Bannato Iscritto dal: Mar 2008 Città: Villabate(PA) Messaggi: 2515	Ho effettuato parecchie prove con entrambi i compilatori, con stringhe diverse e in diverse posizioni, e sembrerebbe tutto a posto. Per esempio, per il punto B: Cionci, ti sarei grato se volessi confermare i risultati anche col tuo compilatore(se non ricordo male usi gcc in ambiente Unix/Linux?).

12-08-2008, 16:08	#110
Vincenzo1968 Bannato Iscritto dal: Mar 2008 Città: Villabate(PA) Messaggi: 2515	Ciao Gugo, sulla mia macchina risulta leggermente più lento: e ho trovato qualche piccola incongruenza. Per esempio, se nei due file mettiamo le stringhe indicate da cionci: CCACTGCTGTGCGGATCTCTAAAAA AACTCTCTCCGGGTCACATCCTCCA il risultato dovrebbe essere Codice: Stringhe CTGTGCGG CTCTCCGG di lunghezza 8 trovate alle posizioni 6 e 4 e invece è: A questo punto, visti i tempi ridicolmente brevi che stiamo ottenendo, non sarebbe il caso di provare su file di dimensione maggiore?

12-08-2008, 19:30	#113
Vincenzo1968 Bannato Iscritto dal: Mar 2008 Città: Villabate(PA) Messaggi: 2515	Ho effettuato delle prove su file dieci volte più grandi. Praticamente ho tolto le due stringhe dai file originali; ho copiato e incollato i due file per dieci volte; infine, ho reinserito le stringhe in due punti a caso. Questi sono i risultati: Versione C#(GugoXX): Versione C(mia): Per effettuare i test ho modificato la main della versione C# in modo da fargli comprendere i tempi di lettura dai file(come faccio io nella versione C): Codice: ... Stopwatch sw = new Stopwatch(); sw.Start(); input1 = File.ReadAllText("DNA1E.txt"); input2 = File.ReadAllText("DNA2E.txt"); //Randomizer rnd = new Randomizer(); //rnd.SetRandomTest(); //input1 = rnd.input1; //input2 = rnd.input2; Run Current = TryV5.Get(input1, input2, 2); sw.Stop(); ...

Strumenti
Mostra una versione stampabile Invia questa pagina per email